clang 22.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/raw_ostream.h"
42#include <cassert>
43#include <cstdint>
44#include <numeric>
45#include <optional>
46
47using namespace clang;
48using namespace CodeGen;
49using namespace llvm::omp;
50
51namespace {
52/// Base class for handling code generation inside OpenMP regions.
53class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
54public:
55 /// Kinds of OpenMP regions used in codegen.
56 enum CGOpenMPRegionKind {
57 /// Region with outlined function for standalone 'parallel'
58 /// directive.
59 ParallelOutlinedRegion,
60 /// Region with outlined function for standalone 'task' directive.
61 TaskOutlinedRegion,
62 /// Region for constructs that do not require function outlining,
63 /// like 'for', 'sections', 'atomic' etc. directives.
64 InlinedRegion,
65 /// Region with outlined function for standalone 'target' directive.
66 TargetRegion,
67 };
68
69 CGOpenMPRegionInfo(const CapturedStmt &CS,
70 const CGOpenMPRegionKind RegionKind,
71 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
72 bool HasCancel)
73 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
74 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
75
76 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
77 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
78 bool HasCancel)
79 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
80 Kind(Kind), HasCancel(HasCancel) {}
81
82 /// Get a variable or parameter for storing global thread id
83 /// inside OpenMP construct.
84 virtual const VarDecl *getThreadIDVariable() const = 0;
85
86 /// Emit the captured statement body.
87 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
88
89 /// Get an LValue for the current ThreadID variable.
90 /// \return LValue for thread id variable. This LValue always has type int32*.
91 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
92
93 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
94
95 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
96
97 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
98
99 bool hasCancel() const { return HasCancel; }
100
101 static bool classof(const CGCapturedStmtInfo *Info) {
102 return Info->getKind() == CR_OpenMP;
103 }
104
105 ~CGOpenMPRegionInfo() override = default;
106
107protected:
108 CGOpenMPRegionKind RegionKind;
109 RegionCodeGenTy CodeGen;
111 bool HasCancel;
112};
113
114/// API for captured statement code generation in OpenMP constructs.
115class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
116public:
117 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
118 const RegionCodeGenTy &CodeGen,
119 OpenMPDirectiveKind Kind, bool HasCancel,
120 StringRef HelperName)
121 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
122 HasCancel),
123 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
124 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
125 }
126
127 /// Get a variable or parameter for storing global thread id
128 /// inside OpenMP construct.
129 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
130
131 /// Get the name of the capture helper.
132 StringRef getHelperName() const override { return HelperName; }
133
134 static bool classof(const CGCapturedStmtInfo *Info) {
135 return CGOpenMPRegionInfo::classof(Info) &&
136 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
137 ParallelOutlinedRegion;
138 }
139
140private:
141 /// A variable or parameter storing global thread id for OpenMP
142 /// constructs.
143 const VarDecl *ThreadIDVar;
144 StringRef HelperName;
145};
146
147/// API for captured statement code generation in OpenMP constructs.
148class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
149public:
150 class UntiedTaskActionTy final : public PrePostActionTy {
151 bool Untied;
152 const VarDecl *PartIDVar;
153 const RegionCodeGenTy UntiedCodeGen;
154 llvm::SwitchInst *UntiedSwitch = nullptr;
155
156 public:
157 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
158 const RegionCodeGenTy &UntiedCodeGen)
159 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
160 void Enter(CodeGenFunction &CGF) override {
161 if (Untied) {
162 // Emit task switching point.
163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164 CGF.GetAddrOfLocalVar(PartIDVar),
165 PartIDVar->getType()->castAs<PointerType>());
166 llvm::Value *Res =
167 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
168 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
169 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
170 CGF.EmitBlock(DoneBB);
172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
174 CGF.Builder.GetInsertBlock());
175 emitUntiedSwitch(CGF);
176 }
177 }
178 void emitUntiedSwitch(CodeGenFunction &CGF) const {
179 if (Untied) {
180 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
181 CGF.GetAddrOfLocalVar(PartIDVar),
182 PartIDVar->getType()->castAs<PointerType>());
183 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
184 PartIdLVal);
185 UntiedCodeGen(CGF);
186 CodeGenFunction::JumpDest CurPoint =
187 CGF.getJumpDestInCurrentScope(".untied.next.");
189 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
190 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
191 CGF.Builder.GetInsertBlock());
192 CGF.EmitBranchThroughCleanup(CurPoint);
193 CGF.EmitBlock(CurPoint.getBlock());
194 }
195 }
196 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
197 };
198 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
199 const VarDecl *ThreadIDVar,
200 const RegionCodeGenTy &CodeGen,
201 OpenMPDirectiveKind Kind, bool HasCancel,
202 const UntiedTaskActionTy &Action)
203 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
204 ThreadIDVar(ThreadIDVar), Action(Action) {
205 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
206 }
207
208 /// Get a variable or parameter for storing global thread id
209 /// inside OpenMP construct.
210 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
211
212 /// Get an LValue for the current ThreadID variable.
213 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
214
215 /// Get the name of the capture helper.
216 StringRef getHelperName() const override { return ".omp_outlined."; }
217
218 void emitUntiedSwitch(CodeGenFunction &CGF) override {
219 Action.emitUntiedSwitch(CGF);
220 }
221
222 static bool classof(const CGCapturedStmtInfo *Info) {
223 return CGOpenMPRegionInfo::classof(Info) &&
224 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
225 TaskOutlinedRegion;
226 }
227
228private:
229 /// A variable or parameter storing global thread id for OpenMP
230 /// constructs.
231 const VarDecl *ThreadIDVar;
232 /// Action for emitting code for untied tasks.
233 const UntiedTaskActionTy &Action;
234};
235
236/// API for inlined captured statement code generation in OpenMP
237/// constructs.
238class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
239public:
240 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
241 const RegionCodeGenTy &CodeGen,
242 OpenMPDirectiveKind Kind, bool HasCancel)
243 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
244 OldCSI(OldCSI),
245 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
246
247 // Retrieve the value of the context parameter.
248 llvm::Value *getContextValue() const override {
249 if (OuterRegionInfo)
250 return OuterRegionInfo->getContextValue();
251 llvm_unreachable("No context value for inlined OpenMP region");
252 }
253
254 void setContextValue(llvm::Value *V) override {
255 if (OuterRegionInfo) {
256 OuterRegionInfo->setContextValue(V);
257 return;
258 }
259 llvm_unreachable("No context value for inlined OpenMP region");
260 }
261
262 /// Lookup the captured field decl for a variable.
263 const FieldDecl *lookup(const VarDecl *VD) const override {
264 if (OuterRegionInfo)
265 return OuterRegionInfo->lookup(VD);
266 // If there is no outer outlined region,no need to lookup in a list of
267 // captured variables, we can use the original one.
268 return nullptr;
269 }
270
271 FieldDecl *getThisFieldDecl() const override {
272 if (OuterRegionInfo)
273 return OuterRegionInfo->getThisFieldDecl();
274 return nullptr;
275 }
276
277 /// Get a variable or parameter for storing global thread id
278 /// inside OpenMP construct.
279 const VarDecl *getThreadIDVariable() const override {
280 if (OuterRegionInfo)
281 return OuterRegionInfo->getThreadIDVariable();
282 return nullptr;
283 }
284
285 /// Get an LValue for the current ThreadID variable.
286 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
287 if (OuterRegionInfo)
288 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
289 llvm_unreachable("No LValue for inlined OpenMP construct");
290 }
291
292 /// Get the name of the capture helper.
293 StringRef getHelperName() const override {
294 if (auto *OuterRegionInfo = getOldCSI())
295 return OuterRegionInfo->getHelperName();
296 llvm_unreachable("No helper name for inlined OpenMP construct");
297 }
298
299 void emitUntiedSwitch(CodeGenFunction &CGF) override {
300 if (OuterRegionInfo)
301 OuterRegionInfo->emitUntiedSwitch(CGF);
302 }
303
304 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
305
306 static bool classof(const CGCapturedStmtInfo *Info) {
307 return CGOpenMPRegionInfo::classof(Info) &&
308 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
309 }
310
311 ~CGOpenMPInlinedRegionInfo() override = default;
312
313private:
314 /// CodeGen info about outer OpenMP region.
315 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
316 CGOpenMPRegionInfo *OuterRegionInfo;
317};
318
319/// API for captured statement code generation in OpenMP target
320/// constructs. For this captures, implicit parameters are used instead of the
321/// captured fields. The name of the target region has to be unique in a given
322/// application so it is provided by the client, because only the client has
323/// the information to generate that.
324class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
325public:
326 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
327 const RegionCodeGenTy &CodeGen, StringRef HelperName)
328 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
329 /*HasCancel=*/false),
330 HelperName(HelperName) {}
331
332 /// This is unused for target regions because each starts executing
333 /// with a single thread.
334 const VarDecl *getThreadIDVariable() const override { return nullptr; }
335
336 /// Get the name of the capture helper.
337 StringRef getHelperName() const override { return HelperName; }
338
339 static bool classof(const CGCapturedStmtInfo *Info) {
340 return CGOpenMPRegionInfo::classof(Info) &&
341 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
342 }
343
344private:
345 StringRef HelperName;
346};
347
348static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
349 llvm_unreachable("No codegen for expressions");
350}
351/// API for generation of expressions captured in a innermost OpenMP
352/// region.
353class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
354public:
355 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
356 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
357 OMPD_unknown,
358 /*HasCancel=*/false),
359 PrivScope(CGF) {
360 // Make sure the globals captured in the provided statement are local by
361 // using the privatization logic. We assume the same variable is not
362 // captured more than once.
363 for (const auto &C : CS.captures()) {
364 if (!C.capturesVariable() && !C.capturesVariableByCopy())
365 continue;
366
367 const VarDecl *VD = C.getCapturedVar();
368 if (VD->isLocalVarDeclOrParm())
369 continue;
370
371 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
372 /*RefersToEnclosingVariableOrCapture=*/false,
373 VD->getType().getNonReferenceType(), VK_LValue,
374 C.getLocation());
375 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
376 }
377 (void)PrivScope.Privatize();
378 }
379
380 /// Lookup the captured field decl for a variable.
381 const FieldDecl *lookup(const VarDecl *VD) const override {
382 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
383 return FD;
384 return nullptr;
385 }
386
387 /// Emit the captured statement body.
388 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
389 llvm_unreachable("No body for expressions");
390 }
391
392 /// Get a variable or parameter for storing global thread id
393 /// inside OpenMP construct.
394 const VarDecl *getThreadIDVariable() const override {
395 llvm_unreachable("No thread id for expressions");
396 }
397
398 /// Get the name of the capture helper.
399 StringRef getHelperName() const override {
400 llvm_unreachable("No helper name for expressions");
401 }
402
403 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
404
405private:
406 /// Private scope to capture global variables.
407 CodeGenFunction::OMPPrivateScope PrivScope;
408};
409
410/// RAII for emitting code of OpenMP constructs.
411class InlinedOpenMPRegionRAII {
412 CodeGenFunction &CGF;
413 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
414 FieldDecl *LambdaThisCaptureField = nullptr;
415 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
416 bool NoInheritance = false;
417
418public:
419 /// Constructs region for combined constructs.
420 /// \param CodeGen Code generation sequence for combined directives. Includes
421 /// a list of functions used for code generation of implicitly inlined
422 /// regions.
423 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
424 OpenMPDirectiveKind Kind, bool HasCancel,
425 bool NoInheritance = true)
426 : CGF(CGF), NoInheritance(NoInheritance) {
427 // Start emission for the construct.
428 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
429 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
430 if (NoInheritance) {
431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
433 CGF.LambdaThisCaptureField = nullptr;
434 BlockInfo = CGF.BlockInfo;
435 CGF.BlockInfo = nullptr;
436 }
437 }
438
439 ~InlinedOpenMPRegionRAII() {
440 // Restore original CapturedStmtInfo only if we're done with code emission.
441 auto *OldCSI =
442 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
443 delete CGF.CapturedStmtInfo;
444 CGF.CapturedStmtInfo = OldCSI;
445 if (NoInheritance) {
446 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
447 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
448 CGF.BlockInfo = BlockInfo;
449 }
450 }
451};
452
453/// Values for bit flags used in the ident_t to describe the fields.
454/// All enumeric elements are named and described in accordance with the code
455/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
456enum OpenMPLocationFlags : unsigned {
457 /// Use trampoline for internal microtask.
458 OMP_IDENT_IMD = 0x01,
459 /// Use c-style ident structure.
460 OMP_IDENT_KMPC = 0x02,
461 /// Atomic reduction option for kmpc_reduce.
462 OMP_ATOMIC_REDUCE = 0x10,
463 /// Explicit 'barrier' directive.
464 OMP_IDENT_BARRIER_EXPL = 0x20,
465 /// Implicit barrier in code.
466 OMP_IDENT_BARRIER_IMPL = 0x40,
467 /// Implicit barrier in 'for' directive.
468 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
469 /// Implicit barrier in 'sections' directive.
470 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
471 /// Implicit barrier in 'single' directive.
472 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
473 /// Call of __kmp_for_static_init for static loop.
474 OMP_IDENT_WORK_LOOP = 0x200,
475 /// Call of __kmp_for_static_init for sections.
476 OMP_IDENT_WORK_SECTIONS = 0x400,
477 /// Call of __kmp_for_static_init for distribute.
478 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
479 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
480};
481
482/// Describes ident structure that describes a source location.
483/// All descriptions are taken from
484/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
485/// Original structure:
486/// typedef struct ident {
487/// kmp_int32 reserved_1; /**< might be used in Fortran;
488/// see above */
489/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
490/// KMP_IDENT_KMPC identifies this union
491/// member */
492/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
493/// see above */
494///#if USE_ITT_BUILD
495/// /* but currently used for storing
496/// region-specific ITT */
497/// /* contextual information. */
498///#endif /* USE_ITT_BUILD */
499/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
500/// C++ */
501/// char const *psource; /**< String describing the source location.
502/// The string is composed of semi-colon separated
503// fields which describe the source file,
504/// the function and a pair of line numbers that
505/// delimit the construct.
506/// */
507/// } ident_t;
508enum IdentFieldIndex {
509 /// might be used in Fortran
510 IdentField_Reserved_1,
511 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
512 IdentField_Flags,
513 /// Not really used in Fortran any more
514 IdentField_Reserved_2,
515 /// Source[4] in Fortran, do not use for C++
516 IdentField_Reserved_3,
517 /// String describing the source location. The string is composed of
518 /// semi-colon separated fields which describe the source file, the function
519 /// and a pair of line numbers that delimit the construct.
520 IdentField_PSource
521};
522
523/// Schedule types for 'omp for' loops (these enumerators are taken from
524/// the enum sched_type in kmp.h).
525enum OpenMPSchedType {
526 /// Lower bound for default (unordered) versions.
527 OMP_sch_lower = 32,
528 OMP_sch_static_chunked = 33,
529 OMP_sch_static = 34,
530 OMP_sch_dynamic_chunked = 35,
531 OMP_sch_guided_chunked = 36,
532 OMP_sch_runtime = 37,
533 OMP_sch_auto = 38,
534 /// static with chunk adjustment (e.g., simd)
535 OMP_sch_static_balanced_chunked = 45,
536 /// Lower bound for 'ordered' versions.
537 OMP_ord_lower = 64,
538 OMP_ord_static_chunked = 65,
539 OMP_ord_static = 66,
540 OMP_ord_dynamic_chunked = 67,
541 OMP_ord_guided_chunked = 68,
542 OMP_ord_runtime = 69,
543 OMP_ord_auto = 70,
544 OMP_sch_default = OMP_sch_static,
545 /// dist_schedule types
546 OMP_dist_sch_static_chunked = 91,
547 OMP_dist_sch_static = 92,
548 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
549 /// Set if the monotonic schedule modifier was present.
550 OMP_sch_modifier_monotonic = (1 << 29),
551 /// Set if the nonmonotonic schedule modifier was present.
552 OMP_sch_modifier_nonmonotonic = (1 << 30),
553};
554
555/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
556/// region.
557class CleanupTy final : public EHScopeStack::Cleanup {
558 PrePostActionTy *Action;
559
560public:
561 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
562 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
563 if (!CGF.HaveInsertPoint())
564 return;
565 Action->Exit(CGF);
566 }
567};
568
569} // anonymous namespace
570
573 if (PrePostAction) {
574 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
575 Callback(CodeGen, CGF, *PrePostAction);
576 } else {
577 PrePostActionTy Action;
578 Callback(CodeGen, CGF, Action);
579 }
580}
581
582/// Check if the combiner is a call to UDR combiner and if it is so return the
583/// UDR decl used for reduction.
584static const OMPDeclareReductionDecl *
585getReductionInit(const Expr *ReductionOp) {
586 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
587 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
588 if (const auto *DRE =
589 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
590 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
591 return DRD;
592 return nullptr;
593}
594
596 const OMPDeclareReductionDecl *DRD,
597 const Expr *InitOp,
598 Address Private, Address Original,
599 QualType Ty) {
600 if (DRD->getInitializer()) {
601 std::pair<llvm::Function *, llvm::Function *> Reduction =
603 const auto *CE = cast<CallExpr>(InitOp);
604 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
605 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
606 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
607 const auto *LHSDRE =
608 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
609 const auto *RHSDRE =
610 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
611 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
612 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
613 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
614 (void)PrivateScope.Privatize();
617 CGF.EmitIgnoredExpr(InitOp);
618 } else {
619 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
620 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
621 auto *GV = new llvm::GlobalVariable(
622 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
623 llvm::GlobalValue::PrivateLinkage, Init, Name);
624 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
625 RValue InitRVal;
626 switch (CGF.getEvaluationKind(Ty)) {
627 case TEK_Scalar:
628 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
629 break;
630 case TEK_Complex:
631 InitRVal =
633 break;
634 case TEK_Aggregate: {
635 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
636 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
637 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
638 /*IsInitializer=*/false);
639 return;
640 }
641 }
642 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
643 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
644 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
645 /*IsInitializer=*/false);
646 }
647}
648
649/// Emit initialization of arrays of complex types.
650/// \param DestAddr Address of the array.
651/// \param Type Type of array.
652/// \param Init Initial expression of array.
653/// \param SrcAddr Address of the original array.
655 QualType Type, bool EmitDeclareReductionInit,
656 const Expr *Init,
657 const OMPDeclareReductionDecl *DRD,
658 Address SrcAddr = Address::invalid()) {
659 // Perform element-by-element initialization.
660 QualType ElementTy;
661
662 // Drill down to the base element type on both arrays.
663 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
664 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
665 if (DRD)
666 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
667
668 llvm::Value *SrcBegin = nullptr;
669 if (DRD)
670 SrcBegin = SrcAddr.emitRawPointer(CGF);
671 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
672 // Cast from pointer to array type to pointer to single element.
673 llvm::Value *DestEnd =
674 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
675 // The basic structure here is a while-do loop.
676 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
677 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
678 llvm::Value *IsEmpty =
679 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
680 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
681
682 // Enter the loop body, making that address the current address.
683 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
684 CGF.EmitBlock(BodyBB);
685
686 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
687
688 llvm::PHINode *SrcElementPHI = nullptr;
689 Address SrcElementCurrent = Address::invalid();
690 if (DRD) {
691 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
692 "omp.arraycpy.srcElementPast");
693 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
694 SrcElementCurrent =
695 Address(SrcElementPHI, SrcAddr.getElementType(),
696 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
697 }
698 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
699 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
700 DestElementPHI->addIncoming(DestBegin, EntryBB);
701 Address DestElementCurrent =
702 Address(DestElementPHI, DestAddr.getElementType(),
703 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
704
705 // Emit copy.
706 {
708 if (EmitDeclareReductionInit) {
709 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
710 SrcElementCurrent, ElementTy);
711 } else
712 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
713 /*IsInitializer=*/false);
714 }
715
716 if (DRD) {
717 // Shift the address forward by one element.
718 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
719 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
720 "omp.arraycpy.dest.element");
721 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
722 }
723
724 // Shift the address forward by one element.
725 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
726 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
727 "omp.arraycpy.dest.element");
728 // Check whether we've reached the end.
729 llvm::Value *Done =
730 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
731 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
732 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
733
734 // Done.
735 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
736}
737
738LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
739 return CGF.EmitOMPSharedLValue(E);
740}
741
742LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
743 const Expr *E) {
744 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
745 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
746 return LValue();
747}
748
749void ReductionCodeGen::emitAggregateInitialization(
750 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
751 const OMPDeclareReductionDecl *DRD) {
752 // Emit VarDecl with copy init for arrays.
753 // Get the address of the original variable captured in current
754 // captured region.
755 const auto *PrivateVD =
756 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
757 bool EmitDeclareReductionInit =
758 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
759 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
760 EmitDeclareReductionInit,
761 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
762 : PrivateVD->getInit(),
763 DRD, SharedAddr);
764}
765
768 ArrayRef<const Expr *> Privates,
769 ArrayRef<const Expr *> ReductionOps) {
770 ClausesData.reserve(Shareds.size());
771 SharedAddresses.reserve(Shareds.size());
772 Sizes.reserve(Shareds.size());
773 BaseDecls.reserve(Shareds.size());
774 const auto *IOrig = Origs.begin();
775 const auto *IPriv = Privates.begin();
776 const auto *IRed = ReductionOps.begin();
777 for (const Expr *Ref : Shareds) {
778 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
779 std::advance(IOrig, 1);
780 std::advance(IPriv, 1);
781 std::advance(IRed, 1);
782 }
783}
784
786 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
787 "Number of generated lvalues must be exactly N.");
788 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
789 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
790 SharedAddresses.emplace_back(First, Second);
791 if (ClausesData[N].Shared == ClausesData[N].Ref) {
792 OrigAddresses.emplace_back(First, Second);
793 } else {
794 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
795 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
796 OrigAddresses.emplace_back(First, Second);
797 }
798}
799
801 QualType PrivateType = getPrivateType(N);
802 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
803 if (!PrivateType->isVariablyModifiedType()) {
804 Sizes.emplace_back(
805 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
806 nullptr);
807 return;
808 }
809 llvm::Value *Size;
810 llvm::Value *SizeInChars;
811 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
812 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
813 if (AsArraySection) {
814 Size = CGF.Builder.CreatePtrDiff(ElemType,
815 OrigAddresses[N].second.getPointer(CGF),
816 OrigAddresses[N].first.getPointer(CGF));
817 Size = CGF.Builder.CreateNUWAdd(
818 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
819 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
820 } else {
821 SizeInChars =
822 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
823 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
824 }
825 Sizes.emplace_back(SizeInChars, Size);
827 CGF,
829 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
830 RValue::get(Size));
831 CGF.EmitVariablyModifiedType(PrivateType);
832}
833
835 llvm::Value *Size) {
836 QualType PrivateType = getPrivateType(N);
837 if (!PrivateType->isVariablyModifiedType()) {
838 assert(!Size && !Sizes[N].second &&
839 "Size should be nullptr for non-variably modified reduction "
840 "items.");
841 return;
842 }
844 CGF,
846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847 RValue::get(Size));
848 CGF.EmitVariablyModifiedType(PrivateType);
849}
850
852 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
853 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
854 assert(SharedAddresses.size() > N && "No variable was generated");
855 const auto *PrivateVD =
856 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
857 const OMPDeclareReductionDecl *DRD =
858 getReductionInit(ClausesData[N].ReductionOp);
859 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
860 if (DRD && DRD->getInitializer())
861 (void)DefaultInit(CGF);
862 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
863 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
864 (void)DefaultInit(CGF);
865 QualType SharedType = SharedAddresses[N].first.getType();
866 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
867 PrivateAddr, SharedAddr, SharedType);
868 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
869 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
870 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
871 PrivateVD->getType().getQualifiers(),
872 /*IsInitializer=*/false);
873 }
874}
875
877 QualType PrivateType = getPrivateType(N);
878 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
879 return DTorKind != QualType::DK_none;
880}
881
883 Address PrivateAddr) {
884 QualType PrivateType = getPrivateType(N);
885 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
886 if (needCleanups(N)) {
887 PrivateAddr =
888 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
889 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
890 }
891}
892
893static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
894 LValue BaseLV) {
895 BaseTy = BaseTy.getNonReferenceType();
896 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
897 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
898 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
899 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
900 } else {
901 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
902 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
903 }
904 BaseTy = BaseTy->getPointeeType();
905 }
906 return CGF.MakeAddrLValue(
907 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
908 BaseLV.getType(), BaseLV.getBaseInfo(),
909 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
910}
911
913 Address OriginalBaseAddress, llvm::Value *Addr) {
915 Address TopTmp = Address::invalid();
916 Address MostTopTmp = Address::invalid();
917 BaseTy = BaseTy.getNonReferenceType();
918 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
919 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
920 Tmp = CGF.CreateMemTemp(BaseTy);
921 if (TopTmp.isValid())
922 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
923 else
924 MostTopTmp = Tmp;
925 TopTmp = Tmp;
926 BaseTy = BaseTy->getPointeeType();
927 }
928
929 if (Tmp.isValid()) {
931 Addr, Tmp.getElementType());
932 CGF.Builder.CreateStore(Addr, Tmp);
933 return MostTopTmp;
934 }
935
937 Addr, OriginalBaseAddress.getType());
938 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
939}
940
941static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
942 const VarDecl *OrigVD = nullptr;
943 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
944 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
945 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
946 Base = TempOASE->getBase()->IgnoreParenImpCasts();
947 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
948 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 OrigVD = cast<VarDecl>(DE->getDecl());
951 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
952 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
953 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
954 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 OrigVD = cast<VarDecl>(DE->getDecl());
957 }
958 return OrigVD;
959}
960
962 Address PrivateAddr) {
963 const DeclRefExpr *DE;
964 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
965 BaseDecls.emplace_back(OrigVD);
966 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
967 LValue BaseLValue =
968 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
969 OriginalBaseLValue);
970 Address SharedAddr = SharedAddresses[N].first.getAddress();
971 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
972 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
973 SharedAddr.emitRawPointer(CGF));
974 llvm::Value *PrivatePointer =
976 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
977 llvm::Value *Ptr = CGF.Builder.CreateGEP(
978 SharedAddr.getElementType(), PrivatePointer, Adjustment);
979 return castToBase(CGF, OrigVD->getType(),
980 SharedAddresses[N].first.getType(),
981 OriginalBaseLValue.getAddress(), Ptr);
982 }
983 BaseDecls.emplace_back(
984 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
985 return PrivateAddr;
986}
987
989 const OMPDeclareReductionDecl *DRD =
990 getReductionInit(ClausesData[N].ReductionOp);
991 return DRD && DRD->getInitializer();
992}
993
994LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
995 return CGF.EmitLoadOfPointerLValue(
996 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
997 getThreadIDVariable()->getType()->castAs<PointerType>());
998}
999
1000void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1001 if (!CGF.HaveInsertPoint())
1002 return;
1003 // 1.2.2 OpenMP Language Terminology
1004 // Structured block - An executable statement with a single entry at the
1005 // top and a single exit at the bottom.
1006 // The point of exit cannot be a branch out of the structured block.
1007 // longjmp() and throw() must not violate the entry/exit criteria.
1008 CGF.EHStack.pushTerminate();
1009 if (S)
1011 CodeGen(CGF);
1012 CGF.EHStack.popTerminate();
1013}
1014
1015LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1016 CodeGenFunction &CGF) {
1017 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1018 getThreadIDVariable()->getType(),
1020}
1021
1023 QualType FieldTy) {
1024 auto *Field = FieldDecl::Create(
1025 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1026 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1027 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1028 Field->setAccess(AS_public);
1029 DC->addDecl(Field);
1030 return Field;
1031}
1032
1034 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1035 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1036 llvm::OpenMPIRBuilderConfig Config(
1037 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1038 CGM.getLangOpts().OpenMPOffloadMandatory,
1039 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1040 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1041 Config.setDefaultTargetAS(
1042 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1043 Config.setRuntimeCC(CGM.getRuntimeCC());
1044
1045 OMPBuilder.setConfig(Config);
1046 OMPBuilder.initialize();
1047 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1048 CGM.getLangOpts().OpenMPIsTargetDevice
1049 ? CGM.getLangOpts().OMPHostIRFile
1050 : StringRef{});
1051
1052 // The user forces the compiler to behave as if omp requires
1053 // unified_shared_memory was given.
1054 if (CGM.getLangOpts().OpenMPForceUSM) {
1056 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1057 }
1058}
1059
1061 InternalVars.clear();
1062 // Clean non-target variable declarations possibly used only in debug info.
1063 for (const auto &Data : EmittedNonTargetVariables) {
1064 if (!Data.getValue().pointsToAliveValue())
1065 continue;
1066 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1067 if (!GV)
1068 continue;
1069 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1070 continue;
1071 GV->eraseFromParent();
1072 }
1073}
1074
1076 return OMPBuilder.createPlatformSpecificName(Parts);
1077}
1078
1079static llvm::Function *
1081 const Expr *CombinerInitializer, const VarDecl *In,
1082 const VarDecl *Out, bool IsCombiner) {
1083 // void .omp_combiner.(Ty *in, Ty *out);
1084 ASTContext &C = CGM.getContext();
1085 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1086 FunctionArgList Args;
1087 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1090 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1091 Args.push_back(&OmpOutParm);
1092 Args.push_back(&OmpInParm);
1093 const CGFunctionInfo &FnInfo =
1094 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1095 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1096 std::string Name = CGM.getOpenMPRuntime().getName(
1097 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1098 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1099 Name, &CGM.getModule());
1100 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1101 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1102 Fn->removeFnAttr(llvm::Attribute::NoInline);
1103 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1104 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1105 }
1106 CodeGenFunction CGF(CGM);
1107 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1108 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1109 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1110 Out->getLocation());
1112 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1113 Scope.addPrivate(
1114 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1115 .getAddress());
1116 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1117 Scope.addPrivate(
1118 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1119 .getAddress());
1120 (void)Scope.Privatize();
1121 if (!IsCombiner && Out->hasInit() &&
1122 !CGF.isTrivialInitializer(Out->getInit())) {
1123 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1124 Out->getType().getQualifiers(),
1125 /*IsInitializer=*/true);
1126 }
1127 if (CombinerInitializer)
1128 CGF.EmitIgnoredExpr(CombinerInitializer);
1129 Scope.ForceCleanup();
1130 CGF.FinishFunction();
1131 return Fn;
1132}
1133
1136 if (UDRMap.count(D) > 0)
1137 return;
1138 llvm::Function *Combiner = emitCombinerOrInitializer(
1139 CGM, D->getType(), D->getCombiner(),
1142 /*IsCombiner=*/true);
1143 llvm::Function *Initializer = nullptr;
1144 if (const Expr *Init = D->getInitializer()) {
1146 CGM, D->getType(),
1148 : nullptr,
1151 /*IsCombiner=*/false);
1152 }
1153 UDRMap.try_emplace(D, Combiner, Initializer);
1154 if (CGF)
1155 FunctionUDRMap[CGF->CurFn].push_back(D);
1156}
1157
1158std::pair<llvm::Function *, llvm::Function *>
1160 auto I = UDRMap.find(D);
1161 if (I != UDRMap.end())
1162 return I->second;
1163 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164 return UDRMap.lookup(D);
1165}
1166
1167namespace {
1168// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169// Builder if one is present.
1170struct PushAndPopStackRAII {
1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172 bool HasCancel, llvm::omp::Directive Kind)
1173 : OMPBuilder(OMPBuilder) {
1174 if (!OMPBuilder)
1175 return;
1176
1177 // The following callback is the crucial part of clangs cleanup process.
1178 //
1179 // NOTE:
1180 // Once the OpenMPIRBuilder is used to create parallel regions (and
1181 // similar), the cancellation destination (Dest below) is determined via
1182 // IP. That means if we have variables to finalize we split the block at IP,
1183 // use the new block (=BB) as destination to build a JumpDest (via
1184 // getJumpDestInCurrentScope(BB)) which then is fed to
1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186 // to push & pop an FinalizationInfo object.
1187 // The FiniCB will still be needed but at the point where the
1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190 assert(IP.getBlock()->end() == IP.getPoint() &&
1191 "Clang CG should cause non-terminated block!");
1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193 CGF.Builder.restoreIP(IP);
1195 CGF.getOMPCancelDestination(OMPD_parallel);
1196 CGF.EmitBranchThroughCleanup(Dest);
1197 return llvm::Error::success();
1198 };
1199
1200 // TODO: Remove this once we emit parallel regions through the
1201 // OpenMPIRBuilder as it can do this setup internally.
1202 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1203 OMPBuilder->pushFinalizationCB(std::move(FI));
1204 }
1205 ~PushAndPopStackRAII() {
1206 if (OMPBuilder)
1207 OMPBuilder->popFinalizationCB();
1208 }
1209 llvm::OpenMPIRBuilder *OMPBuilder;
1210};
1211} // namespace
1212
1214 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1215 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1216 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1217 assert(ThreadIDVar->getType()->isPointerType() &&
1218 "thread id variable must be of type kmp_int32 *");
1219 CodeGenFunction CGF(CGM, true);
1220 bool HasCancel = false;
1221 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1222 HasCancel = OPD->hasCancel();
1223 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1224 HasCancel = OPD->hasCancel();
1225 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1226 HasCancel = OPSD->hasCancel();
1227 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1230 HasCancel = OPFD->hasCancel();
1231 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD =
1234 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1235 HasCancel = OPFD->hasCancel();
1236 else if (const auto *OPFD =
1237 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1238 HasCancel = OPFD->hasCancel();
1239
1240 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1241 // parallel region to make cancellation barriers work properly.
1242 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1243 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1244 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1245 HasCancel, OutlinedHelperName);
1246 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1247 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1248}
1249
1250std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1251 std::string Suffix = getName({"omp_outlined"});
1252 return (Name + Suffix).str();
1253}
1254
1256 return getOutlinedHelperName(CGF.CurFn->getName());
1257}
1258
1259std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1260 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1261 return (Name + Suffix).str();
1262}
1263
1266 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1267 const RegionCodeGenTy &CodeGen) {
1268 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1270 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1271 CodeGen);
1272}
1273
1276 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1277 const RegionCodeGenTy &CodeGen) {
1278 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1280 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1281 CodeGen);
1282}
1283
1285 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1286 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1287 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1288 bool Tied, unsigned &NumberOfParts) {
1289 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1290 PrePostActionTy &) {
1291 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1292 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1293 llvm::Value *TaskArgs[] = {
1294 UpLoc, ThreadID,
1295 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1296 TaskTVar->getType()->castAs<PointerType>())
1297 .getPointer(CGF)};
1298 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1299 CGM.getModule(), OMPRTL___kmpc_omp_task),
1300 TaskArgs);
1301 };
1302 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1303 UntiedCodeGen);
1304 CodeGen.setAction(Action);
1305 assert(!ThreadIDVar->getType()->isPointerType() &&
1306 "thread id variable must be of type kmp_int32 for tasks");
1307 const OpenMPDirectiveKind Region =
1308 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1309 : OMPD_task;
1310 const CapturedStmt *CS = D.getCapturedStmt(Region);
1311 bool HasCancel = false;
1312 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1313 HasCancel = TD->hasCancel();
1314 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1315 HasCancel = TD->hasCancel();
1316 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1317 HasCancel = TD->hasCancel();
1318 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1319 HasCancel = TD->hasCancel();
1320
1321 CodeGenFunction CGF(CGM, true);
1322 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1323 InnermostKind, HasCancel, Action);
1324 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1325 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1326 if (!Tied)
1327 NumberOfParts = Action.getNumberOfParts();
1328 return Res;
1329}
1330
1332 bool AtCurrentPoint) {
1333 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1334 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1335
1336 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1337 if (AtCurrentPoint) {
1338 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1339 CGF.Builder.GetInsertBlock());
1340 } else {
1341 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1343 }
1344}
1345
1347 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1348 if (Elem.ServiceInsertPt) {
1349 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1350 Elem.ServiceInsertPt = nullptr;
1351 Ptr->eraseFromParent();
1352 }
1353}
1354
1356 SourceLocation Loc,
1357 SmallString<128> &Buffer) {
1358 llvm::raw_svector_ostream OS(Buffer);
1359 // Build debug location
1361 OS << ";";
1362 if (auto *DbgInfo = CGF.getDebugInfo())
1363 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1364 else
1365 OS << PLoc.getFilename();
1366 OS << ";";
1367 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1368 OS << FD->getQualifiedNameAsString();
1369 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1370 return OS.str();
1371}
1372
1374 SourceLocation Loc,
1375 unsigned Flags, bool EmitLoc) {
1376 uint32_t SrcLocStrSize;
1377 llvm::Constant *SrcLocStr;
1378 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1379 llvm::codegenoptions::NoDebugInfo) ||
1380 Loc.isInvalid()) {
1381 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1382 } else {
1383 std::string FunctionName;
1384 std::string FileName;
1385 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1386 FunctionName = FD->getQualifiedNameAsString();
1388 if (auto *DbgInfo = CGF.getDebugInfo())
1389 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1390 else
1391 FileName = PLoc.getFilename();
1392 unsigned Line = PLoc.getLine();
1393 unsigned Column = PLoc.getColumn();
1394 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1395 Column, SrcLocStrSize);
1396 }
1397 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1398 return OMPBuilder.getOrCreateIdent(
1399 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1400}
1401
1403 SourceLocation Loc) {
1404 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1405 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1406 // the clang invariants used below might be broken.
1407 if (CGM.getLangOpts().OpenMPIRBuilder) {
1408 SmallString<128> Buffer;
1409 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1410 uint32_t SrcLocStrSize;
1411 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1412 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1413 return OMPBuilder.getOrCreateThreadID(
1414 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1415 }
1416
1417 llvm::Value *ThreadID = nullptr;
1418 // Check whether we've already cached a load of the thread id in this
1419 // function.
1420 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1421 if (I != OpenMPLocThreadIDMap.end()) {
1422 ThreadID = I->second.ThreadID;
1423 if (ThreadID != nullptr)
1424 return ThreadID;
1425 }
1426 // If exceptions are enabled, do not use parameter to avoid possible crash.
1427 if (auto *OMPRegionInfo =
1428 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1429 if (OMPRegionInfo->getThreadIDVariable()) {
1430 // Check if this an outlined function with thread id passed as argument.
1431 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1432 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1433 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1434 !CGF.getLangOpts().CXXExceptions ||
1435 CGF.Builder.GetInsertBlock() == TopBlock ||
1436 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1437 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1438 TopBlock ||
1439 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1440 CGF.Builder.GetInsertBlock()) {
1441 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1442 // If value loaded in entry block, cache it and use it everywhere in
1443 // function.
1444 if (CGF.Builder.GetInsertBlock() == TopBlock)
1445 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1446 return ThreadID;
1447 }
1448 }
1449 }
1450
1451 // This is not an outlined function region - need to call __kmpc_int32
1452 // kmpc_global_thread_num(ident_t *loc).
1453 // Generate thread id value and cache this value for use across the
1454 // function.
1455 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1456 if (!Elem.ServiceInsertPt)
1458 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1459 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1461 llvm::CallInst *Call = CGF.Builder.CreateCall(
1462 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1463 OMPRTL___kmpc_global_thread_num),
1464 emitUpdateLocation(CGF, Loc));
1465 Call->setCallingConv(CGF.getRuntimeCC());
1466 Elem.ThreadID = Call;
1467 return Call;
1468}
1469
1471 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1472 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1474 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1475 }
1476 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1477 for (const auto *D : I->second)
1478 UDRMap.erase(D);
1479 FunctionUDRMap.erase(I);
1480 }
1481 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1482 for (const auto *D : I->second)
1483 UDMMap.erase(D);
1484 FunctionUDMMap.erase(I);
1485 }
1488}
1489
1491 return OMPBuilder.IdentPtr;
1492}
1493
1494static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1496 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1497 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1498 if (!DevTy)
1499 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1500
1501 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1502 case OMPDeclareTargetDeclAttr::DT_Host:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1504 break;
1505 case OMPDeclareTargetDeclAttr::DT_NoHost:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1507 break;
1508 case OMPDeclareTargetDeclAttr::DT_Any:
1509 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1510 break;
1511 default:
1512 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1513 break;
1514 }
1515}
1516
1517static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1519 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1520 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1521 if (!MapType)
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1523 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1526 break;
1527 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1529 break;
1530 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1531 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1532 break;
1533 default:
1534 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1535 break;
1536 }
1537}
1538
1539static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1540 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1541 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1542
1543 auto FileInfoCallBack = [&]() {
1545 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1546
1547 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1548 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1549
1550 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1551 };
1552
1553 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1554 *CGM.getFileSystem(), ParentName);
1555}
1556
1558 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1559
1560 auto LinkageForVariable = [&VD, this]() {
1561 return CGM.getLLVMLinkageVarDefinition(VD);
1562 };
1563
1564 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1565
1566 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1567 CGM.getContext().getPointerType(VD->getType()));
1568 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1570 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1571 VD->isExternallyVisible(),
1573 VD->getCanonicalDecl()->getBeginLoc()),
1574 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1575 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1576 LinkageForVariable);
1577
1578 if (!addr)
1579 return ConstantAddress::invalid();
1580 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1581}
1582
1583llvm::Constant *
1585 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1586 !CGM.getContext().getTargetInfo().isTLSSupported());
1587 // Lookup the entry, lazily creating it if necessary.
1588 std::string Suffix = getName({"cache", ""});
1589 return OMPBuilder.getOrCreateInternalVariable(
1590 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1591}
1592
1594 const VarDecl *VD,
1595 Address VDAddr,
1596 SourceLocation Loc) {
1597 if (CGM.getLangOpts().OpenMPUseTLS &&
1598 CGM.getContext().getTargetInfo().isTLSSupported())
1599 return VDAddr;
1600
1601 llvm::Type *VarTy = VDAddr.getElementType();
1602 llvm::Value *Args[] = {
1603 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1604 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1605 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1607 return Address(
1608 CGF.EmitRuntimeCall(
1609 OMPBuilder.getOrCreateRuntimeFunction(
1610 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1611 Args),
1612 CGF.Int8Ty, VDAddr.getAlignment());
1613}
1614
1616 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1617 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1618 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1619 // library.
1620 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1621 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1622 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1623 OMPLoc);
1624 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1625 // to register constructor/destructor for variable.
1626 llvm::Value *Args[] = {
1627 OMPLoc,
1628 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1629 Ctor, CopyCtor, Dtor};
1630 CGF.EmitRuntimeCall(
1631 OMPBuilder.getOrCreateRuntimeFunction(
1632 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1633 Args);
1634}
1635
1637 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1638 bool PerformInit, CodeGenFunction *CGF) {
1639 if (CGM.getLangOpts().OpenMPUseTLS &&
1640 CGM.getContext().getTargetInfo().isTLSSupported())
1641 return nullptr;
1642
1643 VD = VD->getDefinition(CGM.getContext());
1644 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1645 QualType ASTTy = VD->getType();
1646
1647 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1648 const Expr *Init = VD->getAnyInitializer();
1649 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1650 // Generate function that re-emits the declaration's initializer into the
1651 // threadprivate copy of the variable VD
1652 CodeGenFunction CtorCGF(CGM);
1653 FunctionArgList Args;
1654 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1655 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1657 Args.push_back(&Dst);
1658
1659 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1660 CGM.getContext().VoidPtrTy, Args);
1661 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1662 std::string Name = getName({"__kmpc_global_ctor_", ""});
1663 llvm::Function *Fn =
1664 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1665 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1666 Args, Loc, Loc);
1667 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1668 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1669 CGM.getContext().VoidPtrTy, Dst.getLocation());
1670 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1671 VDAddr.getAlignment());
1672 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1673 /*IsInitializer=*/true);
1674 ArgVal = CtorCGF.EmitLoadOfScalar(
1675 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1676 CGM.getContext().VoidPtrTy, Dst.getLocation());
1677 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1678 CtorCGF.FinishFunction();
1679 Ctor = Fn;
1680 }
1682 // Generate function that emits destructor call for the threadprivate copy
1683 // of the variable VD
1684 CodeGenFunction DtorCGF(CGM);
1685 FunctionArgList Args;
1686 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1687 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1689 Args.push_back(&Dst);
1690
1691 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1692 CGM.getContext().VoidTy, Args);
1693 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1694 std::string Name = getName({"__kmpc_global_dtor_", ""});
1695 llvm::Function *Fn =
1696 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1697 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1698 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1699 Loc, Loc);
1700 // Create a scope with an artificial location for the body of this function.
1701 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1702 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1703 DtorCGF.GetAddrOfLocalVar(&Dst),
1704 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1705 DtorCGF.emitDestroy(
1706 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1707 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1708 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1709 DtorCGF.FinishFunction();
1710 Dtor = Fn;
1711 }
1712 // Do not emit init function if it is not required.
1713 if (!Ctor && !Dtor)
1714 return nullptr;
1715
1716 // Copying constructor for the threadprivate variable.
1717 // Must be NULL - reserved by runtime, but currently it requires that this
1718 // parameter is always NULL. Otherwise it fires assertion.
1719 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1720 if (Ctor == nullptr) {
1721 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1722 }
1723 if (Dtor == nullptr) {
1724 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1725 }
1726 if (!CGF) {
1727 auto *InitFunctionTy =
1728 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1729 std::string Name = getName({"__omp_threadprivate_init_", ""});
1730 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1731 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1732 CodeGenFunction InitCGF(CGM);
1733 FunctionArgList ArgList;
1734 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1735 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1736 Loc, Loc);
1737 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1738 InitCGF.FinishFunction();
1739 return InitFunction;
1740 }
1741 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1742 }
1743 return nullptr;
1744}
1745
1747 llvm::GlobalValue *GV) {
1748 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1749 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1750
1751 // We only need to handle active 'indirect' declare target functions.
1752 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1753 return;
1754
1755 // Get a mangled name to store the new device global in.
1756 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1758 SmallString<128> Name;
1759 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1760
1761 // We need to generate a new global to hold the address of the indirectly
1762 // called device function. Doing this allows us to keep the visibility and
1763 // linkage of the associated function unchanged while allowing the runtime to
1764 // access its value.
1765 llvm::GlobalValue *Addr = GV;
1766 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1767 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1768 CGM.getLLVMContext(),
1769 CGM.getModule().getDataLayout().getProgramAddressSpace());
1770 Addr = new llvm::GlobalVariable(
1771 CGM.getModule(), FnPtrTy,
1772 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1773 nullptr, llvm::GlobalValue::NotThreadLocal,
1774 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1775 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1776 }
1777
1778 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1779 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1780 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1781 llvm::GlobalValue::WeakODRLinkage);
1782}
1783
1785 QualType VarType,
1786 StringRef Name) {
1787 std::string Suffix = getName({"artificial", ""});
1788 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1789 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1790 VarLVType, Twine(Name).concat(Suffix).str());
1791 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1792 CGM.getTarget().isTLSSupported()) {
1793 GAddr->setThreadLocal(/*Val=*/true);
1794 return Address(GAddr, GAddr->getValueType(),
1795 CGM.getContext().getTypeAlignInChars(VarType));
1796 }
1797 std::string CacheSuffix = getName({"cache", ""});
1798 llvm::Value *Args[] = {
1801 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1802 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1803 /*isSigned=*/false),
1804 OMPBuilder.getOrCreateInternalVariable(
1805 CGM.VoidPtrPtrTy,
1806 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1807 return Address(
1809 CGF.EmitRuntimeCall(
1810 OMPBuilder.getOrCreateRuntimeFunction(
1811 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1812 Args),
1813 CGF.Builder.getPtrTy(0)),
1814 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1815}
1816
1818 const RegionCodeGenTy &ThenGen,
1819 const RegionCodeGenTy &ElseGen) {
1820 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1821
1822 // If the condition constant folds and can be elided, try to avoid emitting
1823 // the condition and the dead arm of the if/else.
1824 bool CondConstant;
1825 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1826 if (CondConstant)
1827 ThenGen(CGF);
1828 else
1829 ElseGen(CGF);
1830 return;
1831 }
1832
1833 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1834 // emit the conditional branch.
1835 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1836 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1837 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1838 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1839
1840 // Emit the 'then' code.
1841 CGF.EmitBlock(ThenBlock);
1842 ThenGen(CGF);
1843 CGF.EmitBranch(ContBlock);
1844 // Emit the 'else' code if present.
1845 // There is no need to emit line number for unconditional branch.
1847 CGF.EmitBlock(ElseBlock);
1848 ElseGen(CGF);
1849 // There is no need to emit line number for unconditional branch.
1851 CGF.EmitBranch(ContBlock);
1852 // Emit the continuation block for code after the if.
1853 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1854}
1855
1857 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1858 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1859 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1860 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1861 if (!CGF.HaveInsertPoint())
1862 return;
1863 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1864 auto &M = CGM.getModule();
1865 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1866 this](CodeGenFunction &CGF, PrePostActionTy &) {
1867 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1868 llvm::Value *Args[] = {
1869 RTLoc,
1870 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1871 OutlinedFn};
1873 RealArgs.append(std::begin(Args), std::end(Args));
1874 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1875
1876 llvm::FunctionCallee RTLFn =
1877 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1878 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1879 };
1880 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1881 this](CodeGenFunction &CGF, PrePostActionTy &) {
1883 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1884 // Build calls:
1885 // __kmpc_serialized_parallel(&Loc, GTid);
1886 llvm::Value *Args[] = {RTLoc, ThreadID};
1887 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1888 M, OMPRTL___kmpc_serialized_parallel),
1889 Args);
1890
1891 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1892 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1893 RawAddress ZeroAddrBound =
1895 /*Name=*/".bound.zero.addr");
1896 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1898 // ThreadId for serialized parallels is 0.
1899 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1900 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1901 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1902
1903 // Ensure we do not inline the function. This is trivially true for the ones
1904 // passed to __kmpc_fork_call but the ones called in serialized regions
1905 // could be inlined. This is not a perfect but it is closer to the invariant
1906 // we want, namely, every data environment starts with a new function.
1907 // TODO: We should pass the if condition to the runtime function and do the
1908 // handling there. Much cleaner code.
1909 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1910 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1911 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1912
1913 // __kmpc_end_serialized_parallel(&Loc, GTid);
1914 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1915 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1916 M, OMPRTL___kmpc_end_serialized_parallel),
1917 EndArgs);
1918 };
1919 if (IfCond) {
1920 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1921 } else {
1922 RegionCodeGenTy ThenRCG(ThenGen);
1923 ThenRCG(CGF);
1924 }
1925}
1926
1927// If we're inside an (outlined) parallel region, use the region info's
1928// thread-ID variable (it is passed in a first argument of the outlined function
1929// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1930// regular serial code region, get thread ID by calling kmp_int32
1931// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1932// return the address of that temp.
1934 SourceLocation Loc) {
1935 if (auto *OMPRegionInfo =
1936 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1937 if (OMPRegionInfo->getThreadIDVariable())
1938 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1939
1940 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1941 QualType Int32Ty =
1942 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1943 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1944 CGF.EmitStoreOfScalar(ThreadID,
1945 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1946
1947 return ThreadIDTemp;
1948}
1949
1950llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1951 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1952 std::string Name = getName({Prefix, "var"});
1953 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1954}
1955
1956namespace {
1957/// Common pre(post)-action for different OpenMP constructs.
1958class CommonActionTy final : public PrePostActionTy {
1959 llvm::FunctionCallee EnterCallee;
1960 ArrayRef<llvm::Value *> EnterArgs;
1961 llvm::FunctionCallee ExitCallee;
1962 ArrayRef<llvm::Value *> ExitArgs;
1963 bool Conditional;
1964 llvm::BasicBlock *ContBlock = nullptr;
1965
1966public:
1967 CommonActionTy(llvm::FunctionCallee EnterCallee,
1968 ArrayRef<llvm::Value *> EnterArgs,
1969 llvm::FunctionCallee ExitCallee,
1970 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1971 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1972 ExitArgs(ExitArgs), Conditional(Conditional) {}
1973 void Enter(CodeGenFunction &CGF) override {
1974 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1975 if (Conditional) {
1976 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1977 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1978 ContBlock = CGF.createBasicBlock("omp_if.end");
1979 // Generate the branch (If-stmt)
1980 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1981 CGF.EmitBlock(ThenBlock);
1982 }
1983 }
1984 void Done(CodeGenFunction &CGF) {
1985 // Emit the rest of blocks/branches
1986 CGF.EmitBranch(ContBlock);
1987 CGF.EmitBlock(ContBlock, true);
1988 }
1989 void Exit(CodeGenFunction &CGF) override {
1990 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1991 }
1992};
1993} // anonymous namespace
1994
1996 StringRef CriticalName,
1997 const RegionCodeGenTy &CriticalOpGen,
1998 SourceLocation Loc, const Expr *Hint) {
1999 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2000 // CriticalOpGen();
2001 // __kmpc_end_critical(ident_t *, gtid, Lock);
2002 // Prepare arguments and build a call to __kmpc_critical
2003 if (!CGF.HaveInsertPoint())
2004 return;
2005 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2006 CGM.getModule(),
2007 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2008 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2009 unsigned LockVarArgIdx = 2;
2010 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2011 RuntimeFcn.getFunctionType()
2012 ->getParamType(LockVarArgIdx)
2013 ->getPointerAddressSpace())
2014 LockVar = CGF.Builder.CreateAddrSpaceCast(
2015 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2016 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2017 LockVar};
2018 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2019 std::end(Args));
2020 if (Hint) {
2021 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2022 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2023 }
2024 CommonActionTy Action(RuntimeFcn, EnterArgs,
2025 OMPBuilder.getOrCreateRuntimeFunction(
2026 CGM.getModule(), OMPRTL___kmpc_end_critical),
2027 Args);
2028 CriticalOpGen.setAction(Action);
2029 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2030}
2031
2033 const RegionCodeGenTy &MasterOpGen,
2034 SourceLocation Loc) {
2035 if (!CGF.HaveInsertPoint())
2036 return;
2037 // if(__kmpc_master(ident_t *, gtid)) {
2038 // MasterOpGen();
2039 // __kmpc_end_master(ident_t *, gtid);
2040 // }
2041 // Prepare arguments and build a call to __kmpc_master
2042 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2043 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2044 CGM.getModule(), OMPRTL___kmpc_master),
2045 Args,
2046 OMPBuilder.getOrCreateRuntimeFunction(
2047 CGM.getModule(), OMPRTL___kmpc_end_master),
2048 Args,
2049 /*Conditional=*/true);
2050 MasterOpGen.setAction(Action);
2051 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2052 Action.Done(CGF);
2053}
2054
2056 const RegionCodeGenTy &MaskedOpGen,
2057 SourceLocation Loc, const Expr *Filter) {
2058 if (!CGF.HaveInsertPoint())
2059 return;
2060 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2061 // MaskedOpGen();
2062 // __kmpc_end_masked(iden_t *, gtid);
2063 // }
2064 // Prepare arguments and build a call to __kmpc_masked
2065 llvm::Value *FilterVal = Filter
2066 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2067 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2068 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2069 FilterVal};
2070 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2071 getThreadID(CGF, Loc)};
2072 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2073 CGM.getModule(), OMPRTL___kmpc_masked),
2074 Args,
2075 OMPBuilder.getOrCreateRuntimeFunction(
2076 CGM.getModule(), OMPRTL___kmpc_end_masked),
2077 ArgsEnd,
2078 /*Conditional=*/true);
2079 MaskedOpGen.setAction(Action);
2080 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2081 Action.Done(CGF);
2082}
2083
2085 SourceLocation Loc) {
2086 if (!CGF.HaveInsertPoint())
2087 return;
2088 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2089 OMPBuilder.createTaskyield(CGF.Builder);
2090 } else {
2091 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2092 llvm::Value *Args[] = {
2093 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2094 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2095 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2096 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2097 Args);
2098 }
2099
2100 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2101 Region->emitUntiedSwitch(CGF);
2102}
2103
2105 const RegionCodeGenTy &TaskgroupOpGen,
2106 SourceLocation Loc) {
2107 if (!CGF.HaveInsertPoint())
2108 return;
2109 // __kmpc_taskgroup(ident_t *, gtid);
2110 // TaskgroupOpGen();
2111 // __kmpc_end_taskgroup(ident_t *, gtid);
2112 // Prepare arguments and build a call to __kmpc_taskgroup
2113 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2114 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2115 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2116 Args,
2117 OMPBuilder.getOrCreateRuntimeFunction(
2118 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2119 Args);
2120 TaskgroupOpGen.setAction(Action);
2121 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2122}
2123
2124/// Given an array of pointers to variables, project the address of a
2125/// given variable.
2127 unsigned Index, const VarDecl *Var) {
2128 // Pull out the pointer to the variable.
2129 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2130 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2131
2132 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2133 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2134}
2135
2137 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2138 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2139 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2140 SourceLocation Loc) {
2141 ASTContext &C = CGM.getContext();
2142 // void copy_func(void *LHSArg, void *RHSArg);
2143 FunctionArgList Args;
2144 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2146 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2148 Args.push_back(&LHSArg);
2149 Args.push_back(&RHSArg);
2150 const auto &CGFI =
2151 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2152 std::string Name =
2153 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2154 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2155 llvm::GlobalValue::InternalLinkage, Name,
2156 &CGM.getModule());
2158 Fn->setDoesNotRecurse();
2159 CodeGenFunction CGF(CGM);
2160 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2161 // Dest = (void*[n])(LHSArg);
2162 // Src = (void*[n])(RHSArg);
2164 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2165 CGF.Builder.getPtrTy(0)),
2166 ArgsElemType, CGF.getPointerAlign());
2168 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2169 CGF.Builder.getPtrTy(0)),
2170 ArgsElemType, CGF.getPointerAlign());
2171 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2172 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2173 // ...
2174 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2175 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2176 const auto *DestVar =
2177 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2178 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2179
2180 const auto *SrcVar =
2181 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2182 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2183
2184 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2185 QualType Type = VD->getType();
2186 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2187 }
2188 CGF.FinishFunction();
2189 return Fn;
2190}
2191
2193 const RegionCodeGenTy &SingleOpGen,
2194 SourceLocation Loc,
2195 ArrayRef<const Expr *> CopyprivateVars,
2196 ArrayRef<const Expr *> SrcExprs,
2197 ArrayRef<const Expr *> DstExprs,
2198 ArrayRef<const Expr *> AssignmentOps) {
2199 if (!CGF.HaveInsertPoint())
2200 return;
2201 assert(CopyprivateVars.size() == SrcExprs.size() &&
2202 CopyprivateVars.size() == DstExprs.size() &&
2203 CopyprivateVars.size() == AssignmentOps.size());
2204 ASTContext &C = CGM.getContext();
2205 // int32 did_it = 0;
2206 // if(__kmpc_single(ident_t *, gtid)) {
2207 // SingleOpGen();
2208 // __kmpc_end_single(ident_t *, gtid);
2209 // did_it = 1;
2210 // }
2211 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2212 // <copy_func>, did_it);
2213
2214 Address DidIt = Address::invalid();
2215 if (!CopyprivateVars.empty()) {
2216 // int32 did_it = 0;
2217 QualType KmpInt32Ty =
2218 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2219 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2220 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2221 }
2222 // Prepare arguments and build a call to __kmpc_single
2223 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2224 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2225 CGM.getModule(), OMPRTL___kmpc_single),
2226 Args,
2227 OMPBuilder.getOrCreateRuntimeFunction(
2228 CGM.getModule(), OMPRTL___kmpc_end_single),
2229 Args,
2230 /*Conditional=*/true);
2231 SingleOpGen.setAction(Action);
2232 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2233 if (DidIt.isValid()) {
2234 // did_it = 1;
2235 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2236 }
2237 Action.Done(CGF);
2238 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2239 // <copy_func>, did_it);
2240 if (DidIt.isValid()) {
2241 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2242 QualType CopyprivateArrayTy = C.getConstantArrayType(
2243 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2244 /*IndexTypeQuals=*/0);
2245 // Create a list of all private variables for copyprivate.
2246 Address CopyprivateList =
2247 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2248 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2249 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2250 CGF.Builder.CreateStore(
2252 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2253 CGF.VoidPtrTy),
2254 Elem);
2255 }
2256 // Build function that copies private values from single region to all other
2257 // threads in the corresponding parallel region.
2258 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2259 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2260 SrcExprs, DstExprs, AssignmentOps, Loc);
2261 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2263 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2264 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2265 llvm::Value *Args[] = {
2266 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2267 getThreadID(CGF, Loc), // i32 <gtid>
2268 BufSize, // size_t <buf_size>
2269 CL.emitRawPointer(CGF), // void *<copyprivate list>
2270 CpyFn, // void (*) (void *, void *) <copy_func>
2271 DidItVal // i32 did_it
2272 };
2273 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2274 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2275 Args);
2276 }
2277}
2278
2280 const RegionCodeGenTy &OrderedOpGen,
2281 SourceLocation Loc, bool IsThreads) {
2282 if (!CGF.HaveInsertPoint())
2283 return;
2284 // __kmpc_ordered(ident_t *, gtid);
2285 // OrderedOpGen();
2286 // __kmpc_end_ordered(ident_t *, gtid);
2287 // Prepare arguments and build a call to __kmpc_ordered
2288 if (IsThreads) {
2289 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2290 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2291 CGM.getModule(), OMPRTL___kmpc_ordered),
2292 Args,
2293 OMPBuilder.getOrCreateRuntimeFunction(
2294 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2295 Args);
2296 OrderedOpGen.setAction(Action);
2297 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2298 return;
2299 }
2300 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2301}
2302
2304 unsigned Flags;
2305 if (Kind == OMPD_for)
2306 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2307 else if (Kind == OMPD_sections)
2308 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2309 else if (Kind == OMPD_single)
2310 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2311 else if (Kind == OMPD_barrier)
2312 Flags = OMP_IDENT_BARRIER_EXPL;
2313 else
2314 Flags = OMP_IDENT_BARRIER_IMPL;
2315 return Flags;
2316}
2317
2319 CodeGenFunction &CGF, const OMPLoopDirective &S,
2320 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2321 // Check if the loop directive is actually a doacross loop directive. In this
2322 // case choose static, 1 schedule.
2323 if (llvm::any_of(
2324 S.getClausesOfKind<OMPOrderedClause>(),
2325 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2326 ScheduleKind = OMPC_SCHEDULE_static;
2327 // Chunk size is 1 in this case.
2328 llvm::APInt ChunkSize(32, 1);
2329 ChunkExpr = IntegerLiteral::Create(
2330 CGF.getContext(), ChunkSize,
2331 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2332 SourceLocation());
2333 }
2334}
2335
2337 OpenMPDirectiveKind Kind, bool EmitChecks,
2338 bool ForceSimpleCall) {
2339 // Check if we should use the OMPBuilder
2340 auto *OMPRegionInfo =
2341 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2342 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2343 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2344 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2345 EmitChecks));
2346 CGF.Builder.restoreIP(AfterIP);
2347 return;
2348 }
2349
2350 if (!CGF.HaveInsertPoint())
2351 return;
2352 // Build call __kmpc_cancel_barrier(loc, thread_id);
2353 // Build call __kmpc_barrier(loc, thread_id);
2354 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2355 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2356 // thread_id);
2357 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2358 getThreadID(CGF, Loc)};
2359 if (OMPRegionInfo) {
2360 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2361 llvm::Value *Result = CGF.EmitRuntimeCall(
2362 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2363 OMPRTL___kmpc_cancel_barrier),
2364 Args);
2365 if (EmitChecks) {
2366 // if (__kmpc_cancel_barrier()) {
2367 // exit from construct;
2368 // }
2369 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2370 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2371 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2372 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2373 CGF.EmitBlock(ExitBB);
2374 // exit from construct;
2375 CodeGenFunction::JumpDest CancelDestination =
2376 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2377 CGF.EmitBranchThroughCleanup(CancelDestination);
2378 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2379 }
2380 return;
2381 }
2382 }
2383 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2384 CGM.getModule(), OMPRTL___kmpc_barrier),
2385 Args);
2386}
2387
2389 Expr *ME, bool IsFatal) {
2390 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2391 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2392 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2393 // *message)
2394 llvm::Value *Args[] = {
2395 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2396 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2397 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2398 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2399 CGM.getModule(), OMPRTL___kmpc_error),
2400 Args);
2401}
2402
2403/// Map the OpenMP loop schedule to the runtime enumeration.
2404static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2405 bool Chunked, bool Ordered) {
2406 switch (ScheduleKind) {
2407 case OMPC_SCHEDULE_static:
2408 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2409 : (Ordered ? OMP_ord_static : OMP_sch_static);
2410 case OMPC_SCHEDULE_dynamic:
2411 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2412 case OMPC_SCHEDULE_guided:
2413 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2414 case OMPC_SCHEDULE_runtime:
2415 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2416 case OMPC_SCHEDULE_auto:
2417 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2419 assert(!Chunked && "chunk was specified but schedule kind not known");
2420 return Ordered ? OMP_ord_static : OMP_sch_static;
2421 }
2422 llvm_unreachable("Unexpected runtime schedule");
2423}
2424
2425/// Map the OpenMP distribute schedule to the runtime enumeration.
2426static OpenMPSchedType
2428 // only static is allowed for dist_schedule
2429 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2430}
2431
2433 bool Chunked) const {
2434 OpenMPSchedType Schedule =
2435 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2436 return Schedule == OMP_sch_static;
2437}
2438
2440 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2441 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2442 return Schedule == OMP_dist_sch_static;
2443}
2444
2446 bool Chunked) const {
2447 OpenMPSchedType Schedule =
2448 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2449 return Schedule == OMP_sch_static_chunked;
2450}
2451
2453 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2454 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2455 return Schedule == OMP_dist_sch_static_chunked;
2456}
2457
2459 OpenMPSchedType Schedule =
2460 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2461 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2462 return Schedule != OMP_sch_static;
2463}
2464
2465static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2468 int Modifier = 0;
2469 switch (M1) {
2470 case OMPC_SCHEDULE_MODIFIER_monotonic:
2471 Modifier = OMP_sch_modifier_monotonic;
2472 break;
2473 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2474 Modifier = OMP_sch_modifier_nonmonotonic;
2475 break;
2476 case OMPC_SCHEDULE_MODIFIER_simd:
2477 if (Schedule == OMP_sch_static_chunked)
2478 Schedule = OMP_sch_static_balanced_chunked;
2479 break;
2482 break;
2483 }
2484 switch (M2) {
2485 case OMPC_SCHEDULE_MODIFIER_monotonic:
2486 Modifier = OMP_sch_modifier_monotonic;
2487 break;
2488 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2489 Modifier = OMP_sch_modifier_nonmonotonic;
2490 break;
2491 case OMPC_SCHEDULE_MODIFIER_simd:
2492 if (Schedule == OMP_sch_static_chunked)
2493 Schedule = OMP_sch_static_balanced_chunked;
2494 break;
2497 break;
2498 }
2499 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2500 // If the static schedule kind is specified or if the ordered clause is
2501 // specified, and if the nonmonotonic modifier is not specified, the effect is
2502 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2503 // modifier is specified, the effect is as if the nonmonotonic modifier is
2504 // specified.
2505 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2506 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2507 Schedule == OMP_sch_static_balanced_chunked ||
2508 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2509 Schedule == OMP_dist_sch_static_chunked ||
2510 Schedule == OMP_dist_sch_static))
2511 Modifier = OMP_sch_modifier_nonmonotonic;
2512 }
2513 return Schedule | Modifier;
2514}
2515
2518 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2519 bool Ordered, const DispatchRTInput &DispatchValues) {
2520 if (!CGF.HaveInsertPoint())
2521 return;
2522 OpenMPSchedType Schedule = getRuntimeSchedule(
2523 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2524 assert(Ordered ||
2525 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2526 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2527 Schedule != OMP_sch_static_balanced_chunked));
2528 // Call __kmpc_dispatch_init(
2529 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2530 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2531 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2532
2533 // If the Chunk was not specified in the clause - use default value 1.
2534 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2535 : CGF.Builder.getIntN(IVSize, 1);
2536 llvm::Value *Args[] = {
2537 emitUpdateLocation(CGF, Loc),
2538 getThreadID(CGF, Loc),
2539 CGF.Builder.getInt32(addMonoNonMonoModifier(
2540 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2541 DispatchValues.LB, // Lower
2542 DispatchValues.UB, // Upper
2543 CGF.Builder.getIntN(IVSize, 1), // Stride
2544 Chunk // Chunk
2545 };
2546 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2547 Args);
2548}
2549
2551 SourceLocation Loc) {
2552 if (!CGF.HaveInsertPoint())
2553 return;
2554 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2555 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2556 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2557}
2558
2560 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2561 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2563 const CGOpenMPRuntime::StaticRTInput &Values) {
2564 if (!CGF.HaveInsertPoint())
2565 return;
2566
2567 assert(!Values.Ordered);
2568 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2569 Schedule == OMP_sch_static_balanced_chunked ||
2570 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2571 Schedule == OMP_dist_sch_static ||
2572 Schedule == OMP_dist_sch_static_chunked);
2573
2574 // Call __kmpc_for_static_init(
2575 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2576 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2577 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2578 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2579 llvm::Value *Chunk = Values.Chunk;
2580 if (Chunk == nullptr) {
2581 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2582 Schedule == OMP_dist_sch_static) &&
2583 "expected static non-chunked schedule");
2584 // If the Chunk was not specified in the clause - use default value 1.
2585 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2586 } else {
2587 assert((Schedule == OMP_sch_static_chunked ||
2588 Schedule == OMP_sch_static_balanced_chunked ||
2589 Schedule == OMP_ord_static_chunked ||
2590 Schedule == OMP_dist_sch_static_chunked) &&
2591 "expected static chunked schedule");
2592 }
2593 llvm::Value *Args[] = {
2594 UpdateLocation,
2595 ThreadId,
2596 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2597 M2)), // Schedule type
2598 Values.IL.emitRawPointer(CGF), // &isLastIter
2599 Values.LB.emitRawPointer(CGF), // &LB
2600 Values.UB.emitRawPointer(CGF), // &UB
2601 Values.ST.emitRawPointer(CGF), // &Stride
2602 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2603 Chunk // Chunk
2604 };
2605 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2606}
2607
2609 SourceLocation Loc,
2610 OpenMPDirectiveKind DKind,
2611 const OpenMPScheduleTy &ScheduleKind,
2612 const StaticRTInput &Values) {
2613 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2614 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2615 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2616 "Expected loop-based or sections-based directive.");
2617 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2619 ? OMP_IDENT_WORK_LOOP
2620 : OMP_IDENT_WORK_SECTIONS);
2621 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2622 llvm::FunctionCallee StaticInitFunction =
2623 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2624 false);
2626 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2627 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2628}
2629
2633 const CGOpenMPRuntime::StaticRTInput &Values) {
2634 OpenMPSchedType ScheduleNum =
2635 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2636 llvm::Value *UpdatedLocation =
2637 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2638 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2639 llvm::FunctionCallee StaticInitFunction;
2640 bool isGPUDistribute =
2641 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2642 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2643 Values.IVSize, Values.IVSigned, isGPUDistribute);
2644
2645 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2646 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2648}
2649
2651 SourceLocation Loc,
2652 OpenMPDirectiveKind DKind) {
2653 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2654 DKind == OMPD_sections) &&
2655 "Expected distribute, for, or sections directive kind");
2656 if (!CGF.HaveInsertPoint())
2657 return;
2658 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2659 llvm::Value *Args[] = {
2660 emitUpdateLocation(CGF, Loc,
2662 (DKind == OMPD_target_teams_loop)
2663 ? OMP_IDENT_WORK_DISTRIBUTE
2664 : isOpenMPLoopDirective(DKind)
2665 ? OMP_IDENT_WORK_LOOP
2666 : OMP_IDENT_WORK_SECTIONS),
2667 getThreadID(CGF, Loc)};
2669 if (isOpenMPDistributeDirective(DKind) &&
2670 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2671 CGF.EmitRuntimeCall(
2672 OMPBuilder.getOrCreateRuntimeFunction(
2673 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2674 Args);
2675 else
2676 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2677 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2678 Args);
2679}
2680
2682 SourceLocation Loc,
2683 unsigned IVSize,
2684 bool IVSigned) {
2685 if (!CGF.HaveInsertPoint())
2686 return;
2687 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2688 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2689 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2690 Args);
2691}
2692
2694 SourceLocation Loc, unsigned IVSize,
2695 bool IVSigned, Address IL,
2696 Address LB, Address UB,
2697 Address ST) {
2698 // Call __kmpc_dispatch_next(
2699 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2700 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2701 // kmp_int[32|64] *p_stride);
2702 llvm::Value *Args[] = {
2703 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2704 IL.emitRawPointer(CGF), // &isLastIter
2705 LB.emitRawPointer(CGF), // &Lower
2706 UB.emitRawPointer(CGF), // &Upper
2707 ST.emitRawPointer(CGF) // &Stride
2708 };
2709 llvm::Value *Call = CGF.EmitRuntimeCall(
2710 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2711 return CGF.EmitScalarConversion(
2712 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2713 CGF.getContext().BoolTy, Loc);
2714}
2715
2717 const Expr *Message,
2718 SourceLocation Loc) {
2719 if (!Message)
2720 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2721 return CGF.EmitScalarExpr(Message);
2722}
2723
2724llvm::Value *
2726 SourceLocation Loc) {
2727 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2728 // as if sev-level is fatal."
2729 return llvm::ConstantInt::get(CGM.Int32Ty,
2730 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2731}
2732
2734 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2736 SourceLocation SeverityLoc, const Expr *Message,
2737 SourceLocation MessageLoc) {
2738 if (!CGF.HaveInsertPoint())
2739 return;
2741 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2742 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2743 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2744 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2745 // messsage) if strict modifier is used.
2746 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2747 if (Modifier == OMPC_NUMTHREADS_strict) {
2748 FnID = OMPRTL___kmpc_push_num_threads_strict;
2749 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2750 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2751 }
2752 CGF.EmitRuntimeCall(
2753 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2754}
2755
2757 ProcBindKind ProcBind,
2758 SourceLocation Loc) {
2759 if (!CGF.HaveInsertPoint())
2760 return;
2761 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2762 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2763 llvm::Value *Args[] = {
2764 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2765 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2766 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2767 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2768 Args);
2769}
2770
2772 SourceLocation Loc, llvm::AtomicOrdering AO) {
2773 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2774 OMPBuilder.createFlush(CGF.Builder);
2775 } else {
2776 if (!CGF.HaveInsertPoint())
2777 return;
2778 // Build call void __kmpc_flush(ident_t *loc)
2779 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2780 CGM.getModule(), OMPRTL___kmpc_flush),
2781 emitUpdateLocation(CGF, Loc));
2782 }
2783}
2784
2785namespace {
2786/// Indexes of fields for type kmp_task_t.
2787enum KmpTaskTFields {
2788 /// List of shared variables.
2789 KmpTaskTShareds,
2790 /// Task routine.
2791 KmpTaskTRoutine,
2792 /// Partition id for the untied tasks.
2793 KmpTaskTPartId,
2794 /// Function with call of destructors for private variables.
2795 Data1,
2796 /// Task priority.
2797 Data2,
2798 /// (Taskloops only) Lower bound.
2799 KmpTaskTLowerBound,
2800 /// (Taskloops only) Upper bound.
2801 KmpTaskTUpperBound,
2802 /// (Taskloops only) Stride.
2803 KmpTaskTStride,
2804 /// (Taskloops only) Is last iteration flag.
2805 KmpTaskTLastIter,
2806 /// (Taskloops only) Reduction data.
2807 KmpTaskTReductions,
2808};
2809} // anonymous namespace
2810
2812 // If we are in simd mode or there are no entries, we don't need to do
2813 // anything.
2814 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2815 return;
2816
2817 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2818 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2819 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2820 SourceLocation Loc;
2821 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2822 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2823 E = CGM.getContext().getSourceManager().fileinfo_end();
2824 I != E; ++I) {
2825 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2826 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2827 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2828 I->getFirst(), EntryInfo.Line, 1);
2829 break;
2830 }
2831 }
2832 }
2833 switch (Kind) {
2834 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2835 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2836 DiagnosticsEngine::Error, "Offloading entry for target region in "
2837 "%0 is incorrect: either the "
2838 "address or the ID is invalid.");
2839 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2840 } break;
2841 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2842 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2843 DiagnosticsEngine::Error, "Offloading entry for declare target "
2844 "variable %0 is incorrect: the "
2845 "address is invalid.");
2846 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2847 } break;
2848 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2849 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2851 "Offloading entry for declare target variable is incorrect: the "
2852 "address is invalid.");
2853 CGM.getDiags().Report(DiagID);
2854 } break;
2855 }
2856 };
2857
2858 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2859}
2860
2862 if (!KmpRoutineEntryPtrTy) {
2863 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2864 ASTContext &C = CGM.getContext();
2865 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2867 KmpRoutineEntryPtrQTy = C.getPointerType(
2868 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2869 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2870 }
2871}
2872
2873namespace {
2874struct PrivateHelpersTy {
2875 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2876 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2877 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2878 PrivateElemInit(PrivateElemInit) {}
2879 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2880 const Expr *OriginalRef = nullptr;
2881 const VarDecl *Original = nullptr;
2882 const VarDecl *PrivateCopy = nullptr;
2883 const VarDecl *PrivateElemInit = nullptr;
2884 bool isLocalPrivate() const {
2885 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2886 }
2887};
2888typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2889} // anonymous namespace
2890
2891static bool isAllocatableDecl(const VarDecl *VD) {
2892 const VarDecl *CVD = VD->getCanonicalDecl();
2893 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2894 return false;
2895 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2896 // Use the default allocation.
2897 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2898 !AA->getAllocator());
2899}
2900
2901static RecordDecl *
2903 if (!Privates.empty()) {
2904 ASTContext &C = CGM.getContext();
2905 // Build struct .kmp_privates_t. {
2906 // /* private vars */
2907 // };
2908 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2909 RD->startDefinition();
2910 for (const auto &Pair : Privates) {
2911 const VarDecl *VD = Pair.second.Original;
2913 // If the private variable is a local variable with lvalue ref type,
2914 // allocate the pointer instead of the pointee type.
2915 if (Pair.second.isLocalPrivate()) {
2916 if (VD->getType()->isLValueReferenceType())
2917 Type = C.getPointerType(Type);
2918 if (isAllocatableDecl(VD))
2919 Type = C.getPointerType(Type);
2920 }
2922 if (VD->hasAttrs()) {
2923 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2924 E(VD->getAttrs().end());
2925 I != E; ++I)
2926 FD->addAttr(*I);
2927 }
2928 }
2929 RD->completeDefinition();
2930 return RD;
2931 }
2932 return nullptr;
2933}
2934
2935static RecordDecl *
2937 QualType KmpInt32Ty,
2938 QualType KmpRoutineEntryPointerQTy) {
2939 ASTContext &C = CGM.getContext();
2940 // Build struct kmp_task_t {
2941 // void * shareds;
2942 // kmp_routine_entry_t routine;
2943 // kmp_int32 part_id;
2944 // kmp_cmplrdata_t data1;
2945 // kmp_cmplrdata_t data2;
2946 // For taskloops additional fields:
2947 // kmp_uint64 lb;
2948 // kmp_uint64 ub;
2949 // kmp_int64 st;
2950 // kmp_int32 liter;
2951 // void * reductions;
2952 // };
2953 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2954 UD->startDefinition();
2955 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2956 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2957 UD->completeDefinition();
2958 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
2959 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2960 RD->startDefinition();
2961 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2962 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2963 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2964 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2965 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2966 if (isOpenMPTaskLoopDirective(Kind)) {
2967 QualType KmpUInt64Ty =
2968 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2969 QualType KmpInt64Ty =
2970 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2971 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2972 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2973 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2974 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2975 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2976 }
2977 RD->completeDefinition();
2978 return RD;
2979}
2980
2981static RecordDecl *
2983 ArrayRef<PrivateDataTy> Privates) {
2984 ASTContext &C = CGM.getContext();
2985 // Build struct kmp_task_t_with_privates {
2986 // kmp_task_t task_data;
2987 // .kmp_privates_t. privates;
2988 // };
2989 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2990 RD->startDefinition();
2991 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2992 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2993 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
2994 RD->completeDefinition();
2995 return RD;
2996}
2997
2998/// Emit a proxy function which accepts kmp_task_t as the second
2999/// argument.
3000/// \code
3001/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3002/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3003/// For taskloops:
3004/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3005/// tt->reductions, tt->shareds);
3006/// return 0;
3007/// }
3008/// \endcode
3009static llvm::Function *
3011 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3012 QualType KmpTaskTWithPrivatesPtrQTy,
3013 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3014 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3015 llvm::Value *TaskPrivatesMap) {
3016 ASTContext &C = CGM.getContext();
3017 FunctionArgList Args;
3018 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3020 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3021 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3023 Args.push_back(&GtidArg);
3024 Args.push_back(&TaskTypeArg);
3025 const auto &TaskEntryFnInfo =
3026 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3027 llvm::FunctionType *TaskEntryTy =
3028 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3029 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3030 auto *TaskEntry = llvm::Function::Create(
3031 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3032 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3033 TaskEntry->setDoesNotRecurse();
3034 CodeGenFunction CGF(CGM);
3035 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3036 Loc, Loc);
3037
3038 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3039 // tt,
3040 // For taskloops:
3041 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3042 // tt->task_data.shareds);
3043 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3044 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3045 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3046 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3047 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3048 const auto *KmpTaskTWithPrivatesQTyRD =
3049 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3050 LValue Base =
3051 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3052 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3053 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3054 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3055 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3056
3057 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3058 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3059 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3060 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3061 CGF.ConvertTypeForMem(SharedsPtrTy));
3062
3063 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3064 llvm::Value *PrivatesParam;
3065 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3066 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3067 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3068 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3069 } else {
3070 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3071 }
3072
3073 llvm::Value *CommonArgs[] = {
3074 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3075 CGF.Builder
3076 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3077 CGF.VoidPtrTy, CGF.Int8Ty)
3078 .emitRawPointer(CGF)};
3079 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3080 std::end(CommonArgs));
3081 if (isOpenMPTaskLoopDirective(Kind)) {
3082 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3083 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3084 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3085 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3086 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3087 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3088 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3089 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3090 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3091 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3092 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3093 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3094 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3095 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3096 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3097 CallArgs.push_back(LBParam);
3098 CallArgs.push_back(UBParam);
3099 CallArgs.push_back(StParam);
3100 CallArgs.push_back(LIParam);
3101 CallArgs.push_back(RParam);
3102 }
3103 CallArgs.push_back(SharedsParam);
3104
3105 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3106 CallArgs);
3107 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3108 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3109 CGF.FinishFunction();
3110 return TaskEntry;
3111}
3112
3114 SourceLocation Loc,
3115 QualType KmpInt32Ty,
3116 QualType KmpTaskTWithPrivatesPtrQTy,
3117 QualType KmpTaskTWithPrivatesQTy) {
3118 ASTContext &C = CGM.getContext();
3119 FunctionArgList Args;
3120 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3122 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3123 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3125 Args.push_back(&GtidArg);
3126 Args.push_back(&TaskTypeArg);
3127 const auto &DestructorFnInfo =
3128 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3129 llvm::FunctionType *DestructorFnTy =
3130 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3131 std::string Name =
3132 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3133 auto *DestructorFn =
3134 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3135 Name, &CGM.getModule());
3136 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3137 DestructorFnInfo);
3138 DestructorFn->setDoesNotRecurse();
3139 CodeGenFunction CGF(CGM);
3140 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3141 Args, Loc, Loc);
3142
3143 LValue Base = CGF.EmitLoadOfPointerLValue(
3144 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3145 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3146 const auto *KmpTaskTWithPrivatesQTyRD =
3147 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3148 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3149 Base = CGF.EmitLValueForField(Base, *FI);
3150 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3151 if (QualType::DestructionKind DtorKind =
3152 Field->getType().isDestructedType()) {
3153 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3154 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3155 }
3156 }
3157 CGF.FinishFunction();
3158 return DestructorFn;
3159}
3160
3161/// Emit a privates mapping function for correct handling of private and
3162/// firstprivate variables.
3163/// \code
3164/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3165/// **noalias priv1,..., <tyn> **noalias privn) {
3166/// *priv1 = &.privates.priv1;
3167/// ...;
3168/// *privn = &.privates.privn;
3169/// }
3170/// \endcode
3171static llvm::Value *
3173 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3174 ArrayRef<PrivateDataTy> Privates) {
3175 ASTContext &C = CGM.getContext();
3176 FunctionArgList Args;
3177 ImplicitParamDecl TaskPrivatesArg(
3178 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3179 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3181 Args.push_back(&TaskPrivatesArg);
3182 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3183 unsigned Counter = 1;
3184 for (const Expr *E : Data.PrivateVars) {
3185 Args.push_back(ImplicitParamDecl::Create(
3186 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3187 C.getPointerType(C.getPointerType(E->getType()))
3188 .withConst()
3189 .withRestrict(),
3191 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3192 PrivateVarsPos[VD] = Counter;
3193 ++Counter;
3194 }
3195 for (const Expr *E : Data.FirstprivateVars) {
3196 Args.push_back(ImplicitParamDecl::Create(
3197 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3198 C.getPointerType(C.getPointerType(E->getType()))
3199 .withConst()
3200 .withRestrict(),
3202 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3203 PrivateVarsPos[VD] = Counter;
3204 ++Counter;
3205 }
3206 for (const Expr *E : Data.LastprivateVars) {
3207 Args.push_back(ImplicitParamDecl::Create(
3208 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3209 C.getPointerType(C.getPointerType(E->getType()))
3210 .withConst()
3211 .withRestrict(),
3213 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3214 PrivateVarsPos[VD] = Counter;
3215 ++Counter;
3216 }
3217 for (const VarDecl *VD : Data.PrivateLocals) {
3219 if (VD->getType()->isLValueReferenceType())
3220 Ty = C.getPointerType(Ty);
3221 if (isAllocatableDecl(VD))
3222 Ty = C.getPointerType(Ty);
3223 Args.push_back(ImplicitParamDecl::Create(
3224 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3225 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3227 PrivateVarsPos[VD] = Counter;
3228 ++Counter;
3229 }
3230 const auto &TaskPrivatesMapFnInfo =
3231 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3232 llvm::FunctionType *TaskPrivatesMapTy =
3233 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3234 std::string Name =
3235 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3236 auto *TaskPrivatesMap = llvm::Function::Create(
3237 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3238 &CGM.getModule());
3239 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3240 TaskPrivatesMapFnInfo);
3241 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3242 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3243 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3244 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3245 }
3246 CodeGenFunction CGF(CGM);
3247 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3248 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3249
3250 // *privi = &.privates.privi;
3251 LValue Base = CGF.EmitLoadOfPointerLValue(
3252 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3253 TaskPrivatesArg.getType()->castAs<PointerType>());
3254 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3255 Counter = 0;
3256 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3257 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3258 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3259 LValue RefLVal =
3260 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3261 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3262 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3263 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3264 ++Counter;
3265 }
3266 CGF.FinishFunction();
3267 return TaskPrivatesMap;
3268}
3269
3270/// Emit initialization for private variables in task-based directives.
3272 const OMPExecutableDirective &D,
3273 Address KmpTaskSharedsPtr, LValue TDBase,
3274 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3275 QualType SharedsTy, QualType SharedsPtrTy,
3276 const OMPTaskDataTy &Data,
3277 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3278 ASTContext &C = CGF.getContext();
3279 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3280 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3281 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3282 ? OMPD_taskloop
3283 : OMPD_task;
3284 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3285 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3286 LValue SrcBase;
3287 bool IsTargetTask =
3288 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3289 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3290 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3291 // PointersArray, SizesArray, and MappersArray. The original variables for
3292 // these arrays are not captured and we get their addresses explicitly.
3293 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3294 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3295 SrcBase = CGF.MakeAddrLValue(
3297 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3298 CGF.ConvertTypeForMem(SharedsTy)),
3299 SharedsTy);
3300 }
3301 FI = FI->getType()->castAsRecordDecl()->field_begin();
3302 for (const PrivateDataTy &Pair : Privates) {
3303 // Do not initialize private locals.
3304 if (Pair.second.isLocalPrivate()) {
3305 ++FI;
3306 continue;
3307 }
3308 const VarDecl *VD = Pair.second.PrivateCopy;
3309 const Expr *Init = VD->getAnyInitializer();
3310 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3311 !CGF.isTrivialInitializer(Init)))) {
3312 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3313 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3314 const VarDecl *OriginalVD = Pair.second.Original;
3315 // Check if the variable is the target-based BasePointersArray,
3316 // PointersArray, SizesArray, or MappersArray.
3317 LValue SharedRefLValue;
3318 QualType Type = PrivateLValue.getType();
3319 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3320 if (IsTargetTask && !SharedField) {
3321 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3322 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3323 cast<CapturedDecl>(OriginalVD->getDeclContext())
3324 ->getNumParams() == 0 &&
3326 cast<CapturedDecl>(OriginalVD->getDeclContext())
3327 ->getDeclContext()) &&
3328 "Expected artificial target data variable.");
3329 SharedRefLValue =
3330 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3331 } else if (ForDup) {
3332 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3333 SharedRefLValue = CGF.MakeAddrLValue(
3334 SharedRefLValue.getAddress().withAlignment(
3335 C.getDeclAlign(OriginalVD)),
3336 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3337 SharedRefLValue.getTBAAInfo());
3338 } else if (CGF.LambdaCaptureFields.count(
3339 Pair.second.Original->getCanonicalDecl()) > 0 ||
3340 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3341 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3342 } else {
3343 // Processing for implicitly captured variables.
3344 InlinedOpenMPRegionRAII Region(
3345 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3346 /*HasCancel=*/false, /*NoInheritance=*/true);
3347 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3348 }
3349 if (Type->isArrayType()) {
3350 // Initialize firstprivate array.
3352 // Perform simple memcpy.
3353 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3354 } else {
3355 // Initialize firstprivate array using element-by-element
3356 // initialization.
3358 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3359 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3360 Address SrcElement) {
3361 // Clean up any temporaries needed by the initialization.
3362 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3363 InitScope.addPrivate(Elem, SrcElement);
3364 (void)InitScope.Privatize();
3365 // Emit initialization for single element.
3366 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3367 CGF, &CapturesInfo);
3368 CGF.EmitAnyExprToMem(Init, DestElement,
3369 Init->getType().getQualifiers(),
3370 /*IsInitializer=*/false);
3371 });
3372 }
3373 } else {
3374 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3375 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3376 (void)InitScope.Privatize();
3377 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3378 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3379 /*capturedByInit=*/false);
3380 }
3381 } else {
3382 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3383 }
3384 }
3385 ++FI;
3386 }
3387}
3388
3389/// Check if duplication function is required for taskloops.
3391 ArrayRef<PrivateDataTy> Privates) {
3392 bool InitRequired = false;
3393 for (const PrivateDataTy &Pair : Privates) {
3394 if (Pair.second.isLocalPrivate())
3395 continue;
3396 const VarDecl *VD = Pair.second.PrivateCopy;
3397 const Expr *Init = VD->getAnyInitializer();
3398 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3400 if (InitRequired)
3401 break;
3402 }
3403 return InitRequired;
3404}
3405
3406
3407/// Emit task_dup function (for initialization of
3408/// private/firstprivate/lastprivate vars and last_iter flag)
3409/// \code
3410/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3411/// lastpriv) {
3412/// // setup lastprivate flag
3413/// task_dst->last = lastpriv;
3414/// // could be constructor calls here...
3415/// }
3416/// \endcode
3417static llvm::Value *
3419 const OMPExecutableDirective &D,
3420 QualType KmpTaskTWithPrivatesPtrQTy,
3421 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3422 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3423 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3424 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3425 ASTContext &C = CGM.getContext();
3426 FunctionArgList Args;
3427 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3428 KmpTaskTWithPrivatesPtrQTy,
3430 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3431 KmpTaskTWithPrivatesPtrQTy,
3433 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3435 Args.push_back(&DstArg);
3436 Args.push_back(&SrcArg);
3437 Args.push_back(&LastprivArg);
3438 const auto &TaskDupFnInfo =
3439 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3440 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3441 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3442 auto *TaskDup = llvm::Function::Create(
3443 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3444 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3445 TaskDup->setDoesNotRecurse();
3446 CodeGenFunction CGF(CGM);
3447 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3448 Loc);
3449
3450 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3451 CGF.GetAddrOfLocalVar(&DstArg),
3452 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3453 // task_dst->liter = lastpriv;
3454 if (WithLastIter) {
3455 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3456 LValue Base = CGF.EmitLValueForField(
3457 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3458 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3459 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3460 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3461 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3462 }
3463
3464 // Emit initial values for private copies (if any).
3465 assert(!Privates.empty());
3466 Address KmpTaskSharedsPtr = Address::invalid();
3467 if (!Data.FirstprivateVars.empty()) {
3468 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3469 CGF.GetAddrOfLocalVar(&SrcArg),
3470 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3471 LValue Base = CGF.EmitLValueForField(
3472 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3473 KmpTaskSharedsPtr = Address(
3475 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3476 KmpTaskTShareds)),
3477 Loc),
3478 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3479 }
3480 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3481 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3482 CGF.FinishFunction();
3483 return TaskDup;
3484}
3485
3486/// Checks if destructor function is required to be generated.
3487/// \return true if cleanups are required, false otherwise.
3488static bool
3489checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3490 ArrayRef<PrivateDataTy> Privates) {
3491 for (const PrivateDataTy &P : Privates) {
3492 if (P.second.isLocalPrivate())
3493 continue;
3494 QualType Ty = P.second.Original->getType().getNonReferenceType();
3495 if (Ty.isDestructedType())
3496 return true;
3497 }
3498 return false;
3499}
3500
3501namespace {
3502/// Loop generator for OpenMP iterator expression.
3503class OMPIteratorGeneratorScope final
3505 CodeGenFunction &CGF;
3506 const OMPIteratorExpr *E = nullptr;
3507 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3508 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3509 OMPIteratorGeneratorScope() = delete;
3510 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3511
3512public:
3513 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3514 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3515 if (!E)
3516 return;
3517 SmallVector<llvm::Value *, 4> Uppers;
3518 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3519 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3520 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3521 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3522 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3523 addPrivate(
3524 HelperData.CounterVD,
3525 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3526 }
3527 Privatize();
3528
3529 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3530 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3531 LValue CLVal =
3532 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3533 HelperData.CounterVD->getType());
3534 // Counter = 0;
3535 CGF.EmitStoreOfScalar(
3536 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3537 CLVal);
3538 CodeGenFunction::JumpDest &ContDest =
3539 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3540 CodeGenFunction::JumpDest &ExitDest =
3541 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3542 // N = <number-of_iterations>;
3543 llvm::Value *N = Uppers[I];
3544 // cont:
3545 // if (Counter < N) goto body; else goto exit;
3546 CGF.EmitBlock(ContDest.getBlock());
3547 auto *CVal =
3548 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3549 llvm::Value *Cmp =
3550 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3551 ? CGF.Builder.CreateICmpSLT(CVal, N)
3552 : CGF.Builder.CreateICmpULT(CVal, N);
3553 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3554 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3555 // body:
3556 CGF.EmitBlock(BodyBB);
3557 // Iteri = Begini + Counter * Stepi;
3558 CGF.EmitIgnoredExpr(HelperData.Update);
3559 }
3560 }
3561 ~OMPIteratorGeneratorScope() {
3562 if (!E)
3563 return;
3564 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3565 // Counter = Counter + 1;
3566 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3567 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3568 // goto cont;
3569 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3570 // exit:
3571 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3572 }
3573 }
3574};
3575} // namespace
3576
3577static std::pair<llvm::Value *, llvm::Value *>
3579 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3580 llvm::Value *Addr;
3581 if (OASE) {
3582 const Expr *Base = OASE->getBase();
3583 Addr = CGF.EmitScalarExpr(Base);
3584 } else {
3585 Addr = CGF.EmitLValue(E).getPointer(CGF);
3586 }
3587 llvm::Value *SizeVal;
3588 QualType Ty = E->getType();
3589 if (OASE) {
3590 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3591 for (const Expr *SE : OASE->getDimensions()) {
3592 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3593 Sz = CGF.EmitScalarConversion(
3594 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3595 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3596 }
3597 } else if (const auto *ASE =
3598 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3599 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3600 Address UpAddrAddress = UpAddrLVal.getAddress();
3601 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3602 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3603 /*Idx0=*/1);
3604 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3605 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3606 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3607 } else {
3608 SizeVal = CGF.getTypeSize(Ty);
3609 }
3610 return std::make_pair(Addr, SizeVal);
3611}
3612
3613/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3614static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3615 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3616 if (KmpTaskAffinityInfoTy.isNull()) {
3617 RecordDecl *KmpAffinityInfoRD =
3618 C.buildImplicitRecord("kmp_task_affinity_info_t");
3619 KmpAffinityInfoRD->startDefinition();
3620 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3621 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3622 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3623 KmpAffinityInfoRD->completeDefinition();
3624 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3625 }
3626}
3627
3630 const OMPExecutableDirective &D,
3631 llvm::Function *TaskFunction, QualType SharedsTy,
3632 Address Shareds, const OMPTaskDataTy &Data) {
3633 ASTContext &C = CGM.getContext();
3635 // Aggregate privates and sort them by the alignment.
3636 const auto *I = Data.PrivateCopies.begin();
3637 for (const Expr *E : Data.PrivateVars) {
3638 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3639 Privates.emplace_back(
3640 C.getDeclAlign(VD),
3641 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3642 /*PrivateElemInit=*/nullptr));
3643 ++I;
3644 }
3645 I = Data.FirstprivateCopies.begin();
3646 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3647 for (const Expr *E : Data.FirstprivateVars) {
3648 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3649 Privates.emplace_back(
3650 C.getDeclAlign(VD),
3651 PrivateHelpersTy(
3652 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3653 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3654 ++I;
3655 ++IElemInitRef;
3656 }
3657 I = Data.LastprivateCopies.begin();
3658 for (const Expr *E : Data.LastprivateVars) {
3659 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3660 Privates.emplace_back(
3661 C.getDeclAlign(VD),
3662 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3663 /*PrivateElemInit=*/nullptr));
3664 ++I;
3665 }
3666 for (const VarDecl *VD : Data.PrivateLocals) {
3667 if (isAllocatableDecl(VD))
3668 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3669 else
3670 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3671 }
3672 llvm::stable_sort(Privates,
3673 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3674 return L.first > R.first;
3675 });
3676 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3677 // Build type kmp_routine_entry_t (if not built yet).
3678 emitKmpRoutineEntryT(KmpInt32Ty);
3679 // Build type kmp_task_t (if not built yet).
3680 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3681 if (SavedKmpTaskloopTQTy.isNull()) {
3682 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3683 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3684 }
3686 } else {
3687 assert((D.getDirectiveKind() == OMPD_task ||
3688 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3689 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3690 "Expected taskloop, task or target directive");
3691 if (SavedKmpTaskTQTy.isNull()) {
3692 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3693 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3694 }
3696 }
3697 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3698 // Build particular struct kmp_task_t for the given task.
3699 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3701 CanQualType KmpTaskTWithPrivatesQTy =
3702 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3703 QualType KmpTaskTWithPrivatesPtrQTy =
3704 C.getPointerType(KmpTaskTWithPrivatesQTy);
3705 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3706 llvm::Value *KmpTaskTWithPrivatesTySize =
3707 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3708 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3709
3710 // Emit initial values for private copies (if any).
3711 llvm::Value *TaskPrivatesMap = nullptr;
3712 llvm::Type *TaskPrivatesMapTy =
3713 std::next(TaskFunction->arg_begin(), 3)->getType();
3714 if (!Privates.empty()) {
3715 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3716 TaskPrivatesMap =
3717 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3718 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3719 TaskPrivatesMap, TaskPrivatesMapTy);
3720 } else {
3721 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3722 cast<llvm::PointerType>(TaskPrivatesMapTy));
3723 }
3724 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3725 // kmp_task_t *tt);
3726 llvm::Function *TaskEntry = emitProxyTaskFunction(
3727 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3728 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3729 TaskPrivatesMap);
3730
3731 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3732 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3733 // kmp_routine_entry_t *task_entry);
3734 // Task flags. Format is taken from
3735 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3736 // description of kmp_tasking_flags struct.
3737 enum {
3738 TiedFlag = 0x1,
3739 FinalFlag = 0x2,
3740 DestructorsFlag = 0x8,
3741 PriorityFlag = 0x20,
3742 DetachableFlag = 0x40,
3743 FreeAgentFlag = 0x80,
3744 };
3745 unsigned Flags = Data.Tied ? TiedFlag : 0;
3746 bool NeedsCleanup = false;
3747 if (!Privates.empty()) {
3748 NeedsCleanup =
3749 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3750 if (NeedsCleanup)
3751 Flags = Flags | DestructorsFlag;
3752 }
3753 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3754 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3755 if (Kind == OMPC_THREADSET_omp_pool)
3756 Flags = Flags | FreeAgentFlag;
3757 }
3758 if (Data.Priority.getInt())
3759 Flags = Flags | PriorityFlag;
3760 if (D.hasClausesOfKind<OMPDetachClause>())
3761 Flags = Flags | DetachableFlag;
3762 llvm::Value *TaskFlags =
3763 Data.Final.getPointer()
3764 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3765 CGF.Builder.getInt32(FinalFlag),
3766 CGF.Builder.getInt32(/*C=*/0))
3767 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3768 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3769 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3771 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3773 TaskEntry, KmpRoutineEntryPtrTy)};
3774 llvm::Value *NewTask;
3775 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3776 // Check if we have any device clause associated with the directive.
3777 const Expr *Device = nullptr;
3778 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3779 Device = C->getDevice();
3780 // Emit device ID if any otherwise use default value.
3781 llvm::Value *DeviceID;
3782 if (Device)
3783 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3784 CGF.Int64Ty, /*isSigned=*/true);
3785 else
3786 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3787 AllocArgs.push_back(DeviceID);
3788 NewTask = CGF.EmitRuntimeCall(
3789 OMPBuilder.getOrCreateRuntimeFunction(
3790 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3791 AllocArgs);
3792 } else {
3793 NewTask =
3794 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3795 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3796 AllocArgs);
3797 }
3798 // Emit detach clause initialization.
3799 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3800 // task_descriptor);
3801 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3802 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3803 LValue EvtLVal = CGF.EmitLValue(Evt);
3804
3805 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3806 // int gtid, kmp_task_t *task);
3807 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3808 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3809 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3810 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3811 OMPBuilder.getOrCreateRuntimeFunction(
3812 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3813 {Loc, Tid, NewTask});
3814 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3815 Evt->getExprLoc());
3816 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3817 }
3818 // Process affinity clauses.
3819 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3820 // Process list of affinity data.
3821 ASTContext &C = CGM.getContext();
3822 Address AffinitiesArray = Address::invalid();
3823 // Calculate number of elements to form the array of affinity data.
3824 llvm::Value *NumOfElements = nullptr;
3825 unsigned NumAffinities = 0;
3826 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3827 if (const Expr *Modifier = C->getModifier()) {
3828 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3829 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3830 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3831 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3832 NumOfElements =
3833 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3834 }
3835 } else {
3836 NumAffinities += C->varlist_size();
3837 }
3838 }
3840 // Fields ids in kmp_task_affinity_info record.
3841 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3842
3843 QualType KmpTaskAffinityInfoArrayTy;
3844 if (NumOfElements) {
3845 NumOfElements = CGF.Builder.CreateNUWAdd(
3846 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3847 auto *OVE = new (C) OpaqueValueExpr(
3848 Loc,
3849 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3850 VK_PRValue);
3851 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3852 RValue::get(NumOfElements));
3853 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3855 /*IndexTypeQuals=*/0);
3856 // Properly emit variable-sized array.
3857 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3859 CGF.EmitVarDecl(*PD);
3860 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3861 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3862 /*isSigned=*/false);
3863 } else {
3864 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3866 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3867 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3868 AffinitiesArray =
3869 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3870 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3871 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3872 /*isSigned=*/false);
3873 }
3874
3875 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3876 // Fill array by elements without iterators.
3877 unsigned Pos = 0;
3878 bool HasIterator = false;
3879 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3880 if (C->getModifier()) {
3881 HasIterator = true;
3882 continue;
3883 }
3884 for (const Expr *E : C->varlist()) {
3885 llvm::Value *Addr;
3886 llvm::Value *Size;
3887 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3888 LValue Base =
3889 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3891 // affs[i].base_addr = &<Affinities[i].second>;
3892 LValue BaseAddrLVal = CGF.EmitLValueForField(
3893 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3894 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3895 BaseAddrLVal);
3896 // affs[i].len = sizeof(<Affinities[i].second>);
3897 LValue LenLVal = CGF.EmitLValueForField(
3898 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3899 CGF.EmitStoreOfScalar(Size, LenLVal);
3900 ++Pos;
3901 }
3902 }
3903 LValue PosLVal;
3904 if (HasIterator) {
3905 PosLVal = CGF.MakeAddrLValue(
3906 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3907 C.getSizeType());
3908 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3909 }
3910 // Process elements with iterators.
3911 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3912 const Expr *Modifier = C->getModifier();
3913 if (!Modifier)
3914 continue;
3915 OMPIteratorGeneratorScope IteratorScope(
3916 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3917 for (const Expr *E : C->varlist()) {
3918 llvm::Value *Addr;
3919 llvm::Value *Size;
3920 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3921 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3922 LValue Base =
3923 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3925 // affs[i].base_addr = &<Affinities[i].second>;
3926 LValue BaseAddrLVal = CGF.EmitLValueForField(
3927 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3928 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3929 BaseAddrLVal);
3930 // affs[i].len = sizeof(<Affinities[i].second>);
3931 LValue LenLVal = CGF.EmitLValueForField(
3932 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3933 CGF.EmitStoreOfScalar(Size, LenLVal);
3934 Idx = CGF.Builder.CreateNUWAdd(
3935 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3936 CGF.EmitStoreOfScalar(Idx, PosLVal);
3937 }
3938 }
3939 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3940 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3941 // naffins, kmp_task_affinity_info_t *affin_list);
3942 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3943 llvm::Value *GTid = getThreadID(CGF, Loc);
3944 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3945 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3946 // FIXME: Emit the function and ignore its result for now unless the
3947 // runtime function is properly implemented.
3948 (void)CGF.EmitRuntimeCall(
3949 OMPBuilder.getOrCreateRuntimeFunction(
3950 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3951 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3952 }
3953 llvm::Value *NewTaskNewTaskTTy =
3955 NewTask, KmpTaskTWithPrivatesPtrTy);
3956 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3957 KmpTaskTWithPrivatesQTy);
3958 LValue TDBase =
3959 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3960 // Fill the data in the resulting kmp_task_t record.
3961 // Copy shareds if there are any.
3962 Address KmpTaskSharedsPtr = Address::invalid();
3963 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
3964 KmpTaskSharedsPtr = Address(
3965 CGF.EmitLoadOfScalar(
3967 TDBase,
3968 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3969 Loc),
3970 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3971 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3972 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3973 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3974 }
3975 // Emit initial values for private copies (if any).
3977 if (!Privates.empty()) {
3978 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3979 SharedsTy, SharedsPtrTy, Data, Privates,
3980 /*ForDup=*/false);
3981 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3982 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3983 Result.TaskDupFn = emitTaskDupFunction(
3984 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3985 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3986 /*WithLastIter=*/!Data.LastprivateVars.empty());
3987 }
3988 }
3989 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3990 enum { Priority = 0, Destructors = 1 };
3991 // Provide pointer to function with destructors for privates.
3992 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3993 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
3994 assert(KmpCmplrdataUD->isUnion());
3995 if (NeedsCleanup) {
3996 llvm::Value *DestructorFn = emitDestructorsFunction(
3997 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3998 KmpTaskTWithPrivatesQTy);
3999 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4000 LValue DestructorsLV = CGF.EmitLValueForField(
4001 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4003 DestructorFn, KmpRoutineEntryPtrTy),
4004 DestructorsLV);
4005 }
4006 // Set priority.
4007 if (Data.Priority.getInt()) {
4008 LValue Data2LV = CGF.EmitLValueForField(
4009 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4010 LValue PriorityLV = CGF.EmitLValueForField(
4011 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4012 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4013 }
4014 Result.NewTask = NewTask;
4015 Result.TaskEntry = TaskEntry;
4016 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4017 Result.TDBase = TDBase;
4018 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4019 return Result;
4020}
4021
4022/// Translates internal dependency kind into the runtime kind.
4024 RTLDependenceKindTy DepKind;
4025 switch (K) {
4026 case OMPC_DEPEND_in:
4027 DepKind = RTLDependenceKindTy::DepIn;
4028 break;
4029 // Out and InOut dependencies must use the same code.
4030 case OMPC_DEPEND_out:
4031 case OMPC_DEPEND_inout:
4032 DepKind = RTLDependenceKindTy::DepInOut;
4033 break;
4034 case OMPC_DEPEND_mutexinoutset:
4035 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4036 break;
4037 case OMPC_DEPEND_inoutset:
4038 DepKind = RTLDependenceKindTy::DepInOutSet;
4039 break;
4040 case OMPC_DEPEND_outallmemory:
4041 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4042 break;
4043 case OMPC_DEPEND_source:
4044 case OMPC_DEPEND_sink:
4045 case OMPC_DEPEND_depobj:
4046 case OMPC_DEPEND_inoutallmemory:
4048 llvm_unreachable("Unknown task dependence type");
4049 }
4050 return DepKind;
4051}
4052
4053/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4054static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4055 QualType &FlagsTy) {
4056 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4057 if (KmpDependInfoTy.isNull()) {
4058 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4059 KmpDependInfoRD->startDefinition();
4060 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4061 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4062 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4063 KmpDependInfoRD->completeDefinition();
4064 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4065 }
4066}
4067
4068std::pair<llvm::Value *, LValue>
4070 SourceLocation Loc) {
4071 ASTContext &C = CGM.getContext();
4072 QualType FlagsTy;
4073 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4074 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4075 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4077 DepobjLVal.getAddress().withElementType(
4078 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4079 KmpDependInfoPtrTy->castAs<PointerType>());
4080 Address DepObjAddr = CGF.Builder.CreateGEP(
4081 CGF, Base.getAddress(),
4082 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4083 LValue NumDepsBase = CGF.MakeAddrLValue(
4084 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4085 // NumDeps = deps[i].base_addr;
4086 LValue BaseAddrLVal = CGF.EmitLValueForField(
4087 NumDepsBase,
4088 *std::next(KmpDependInfoRD->field_begin(),
4089 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4090 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4091 return std::make_pair(NumDeps, Base);
4092}
4093
4094static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4095 llvm::PointerUnion<unsigned *, LValue *> Pos,
4097 Address DependenciesArray) {
4098 CodeGenModule &CGM = CGF.CGM;
4099 ASTContext &C = CGM.getContext();
4100 QualType FlagsTy;
4101 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4102 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4103 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4104
4105 OMPIteratorGeneratorScope IteratorScope(
4106 CGF, cast_or_null<OMPIteratorExpr>(
4107 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4108 : nullptr));
4109 for (const Expr *E : Data.DepExprs) {
4110 llvm::Value *Addr;
4111 llvm::Value *Size;
4112
4113 // The expression will be a nullptr in the 'omp_all_memory' case.
4114 if (E) {
4115 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4116 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4117 } else {
4118 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4119 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4120 }
4121 LValue Base;
4122 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4123 Base = CGF.MakeAddrLValue(
4124 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4125 } else {
4126 assert(E && "Expected a non-null expression");
4127 LValue &PosLVal = *cast<LValue *>(Pos);
4128 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4129 Base = CGF.MakeAddrLValue(
4130 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4131 }
4132 // deps[i].base_addr = &<Dependencies[i].second>;
4133 LValue BaseAddrLVal = CGF.EmitLValueForField(
4134 Base,
4135 *std::next(KmpDependInfoRD->field_begin(),
4136 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4137 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4138 // deps[i].len = sizeof(<Dependencies[i].second>);
4139 LValue LenLVal = CGF.EmitLValueForField(
4140 Base, *std::next(KmpDependInfoRD->field_begin(),
4141 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4142 CGF.EmitStoreOfScalar(Size, LenLVal);
4143 // deps[i].flags = <Dependencies[i].first>;
4144 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4145 LValue FlagsLVal = CGF.EmitLValueForField(
4146 Base,
4147 *std::next(KmpDependInfoRD->field_begin(),
4148 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4150 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4151 FlagsLVal);
4152 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4153 ++(*P);
4154 } else {
4155 LValue &PosLVal = *cast<LValue *>(Pos);
4156 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4157 Idx = CGF.Builder.CreateNUWAdd(Idx,
4158 llvm::ConstantInt::get(Idx->getType(), 1));
4159 CGF.EmitStoreOfScalar(Idx, PosLVal);
4160 }
4161 }
4162}
4163
4167 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4168 "Expected depobj dependency kind.");
4170 SmallVector<LValue, 4> SizeLVals;
4171 ASTContext &C = CGF.getContext();
4172 {
4173 OMPIteratorGeneratorScope IteratorScope(
4174 CGF, cast_or_null<OMPIteratorExpr>(
4175 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4176 : nullptr));
4177 for (const Expr *E : Data.DepExprs) {
4178 llvm::Value *NumDeps;
4179 LValue Base;
4180 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4181 std::tie(NumDeps, Base) =
4182 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4183 LValue NumLVal = CGF.MakeAddrLValue(
4184 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4185 C.getUIntPtrType());
4186 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4187 NumLVal.getAddress());
4188 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4189 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4190 CGF.EmitStoreOfScalar(Add, NumLVal);
4191 SizeLVals.push_back(NumLVal);
4192 }
4193 }
4194 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4195 llvm::Value *Size =
4196 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4197 Sizes.push_back(Size);
4198 }
4199 return Sizes;
4200}
4201
4204 LValue PosLVal,
4206 Address DependenciesArray) {
4207 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4208 "Expected depobj dependency kind.");
4209 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4210 {
4211 OMPIteratorGeneratorScope IteratorScope(
4212 CGF, cast_or_null<OMPIteratorExpr>(
4213 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4214 : nullptr));
4215 for (const Expr *E : Data.DepExprs) {
4216 llvm::Value *NumDeps;
4217 LValue Base;
4218 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4219 std::tie(NumDeps, Base) =
4220 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4221
4222 // memcopy dependency data.
4223 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4224 ElSize,
4225 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4226 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4227 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4228 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4229
4230 // Increase pos.
4231 // pos += size;
4232 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4233 CGF.EmitStoreOfScalar(Add, PosLVal);
4234 }
4235 }
4236}
4237
4238std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4240 SourceLocation Loc) {
4241 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4242 return D.DepExprs.empty();
4243 }))
4244 return std::make_pair(nullptr, Address::invalid());
4245 // Process list of dependencies.
4246 ASTContext &C = CGM.getContext();
4247 Address DependenciesArray = Address::invalid();
4248 llvm::Value *NumOfElements = nullptr;
4249 unsigned NumDependencies = std::accumulate(
4250 Dependencies.begin(), Dependencies.end(), 0,
4251 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4252 return D.DepKind == OMPC_DEPEND_depobj
4253 ? V
4254 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4255 });
4256 QualType FlagsTy;
4257 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4258 bool HasDepobjDeps = false;
4259 bool HasRegularWithIterators = false;
4260 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4261 llvm::Value *NumOfRegularWithIterators =
4262 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4263 // Calculate number of depobj dependencies and regular deps with the
4264 // iterators.
4265 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4266 if (D.DepKind == OMPC_DEPEND_depobj) {
4269 for (llvm::Value *Size : Sizes) {
4270 NumOfDepobjElements =
4271 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4272 }
4273 HasDepobjDeps = true;
4274 continue;
4275 }
4276 // Include number of iterations, if any.
4277
4278 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4279 llvm::Value *ClauseIteratorSpace =
4280 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4281 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4282 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4283 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4284 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4285 }
4286 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4287 ClauseIteratorSpace,
4288 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4289 NumOfRegularWithIterators =
4290 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4291 HasRegularWithIterators = true;
4292 continue;
4293 }
4294 }
4295
4296 QualType KmpDependInfoArrayTy;
4297 if (HasDepobjDeps || HasRegularWithIterators) {
4298 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4299 /*isSigned=*/false);
4300 if (HasDepobjDeps) {
4301 NumOfElements =
4302 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4303 }
4304 if (HasRegularWithIterators) {
4305 NumOfElements =
4306 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4307 }
4308 auto *OVE = new (C) OpaqueValueExpr(
4309 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4310 VK_PRValue);
4311 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4312 RValue::get(NumOfElements));
4313 KmpDependInfoArrayTy =
4314 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4315 /*IndexTypeQuals=*/0);
4316 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4317 // Properly emit variable-sized array.
4318 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4320 CGF.EmitVarDecl(*PD);
4321 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4322 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4323 /*isSigned=*/false);
4324 } else {
4325 KmpDependInfoArrayTy = C.getConstantArrayType(
4326 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4327 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4328 DependenciesArray =
4329 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4330 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4331 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4332 /*isSigned=*/false);
4333 }
4334 unsigned Pos = 0;
4335 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4336 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4337 continue;
4338 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4339 }
4340 // Copy regular dependencies with iterators.
4341 LValue PosLVal = CGF.MakeAddrLValue(
4342 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4343 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4344 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4345 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4346 continue;
4347 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4348 }
4349 // Copy final depobj arrays without iterators.
4350 if (HasDepobjDeps) {
4351 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4352 if (Dep.DepKind != OMPC_DEPEND_depobj)
4353 continue;
4354 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4355 }
4356 }
4357 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4358 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4359 return std::make_pair(NumOfElements, DependenciesArray);
4360}
4361
4363 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4364 SourceLocation Loc) {
4365 if (Dependencies.DepExprs.empty())
4366 return Address::invalid();
4367 // Process list of dependencies.
4368 ASTContext &C = CGM.getContext();
4369 Address DependenciesArray = Address::invalid();
4370 unsigned NumDependencies = Dependencies.DepExprs.size();
4371 QualType FlagsTy;
4372 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4373 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4374
4375 llvm::Value *Size;
4376 // Define type kmp_depend_info[<Dependencies.size()>];
4377 // For depobj reserve one extra element to store the number of elements.
4378 // It is required to handle depobj(x) update(in) construct.
4379 // kmp_depend_info[<Dependencies.size()>] deps;
4380 llvm::Value *NumDepsVal;
4381 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4382 if (const auto *IE =
4383 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4384 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4385 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4386 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4387 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4388 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4389 }
4390 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4391 NumDepsVal);
4392 CharUnits SizeInBytes =
4393 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4394 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4395 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4396 NumDepsVal =
4397 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4398 } else {
4399 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4400 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4401 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4402 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4403 Size = CGM.getSize(Sz.alignTo(Align));
4404 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4405 }
4406 // Need to allocate on the dynamic memory.
4407 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4408 // Use default allocator.
4409 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4410 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4411
4412 llvm::Value *Addr =
4413 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4414 CGM.getModule(), OMPRTL___kmpc_alloc),
4415 Args, ".dep.arr.addr");
4416 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4418 Addr, CGF.Builder.getPtrTy(0));
4419 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4420 // Write number of elements in the first element of array for depobj.
4421 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4422 // deps[i].base_addr = NumDependencies;
4423 LValue BaseAddrLVal = CGF.EmitLValueForField(
4424 Base,
4425 *std::next(KmpDependInfoRD->field_begin(),
4426 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4427 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4428 llvm::PointerUnion<unsigned *, LValue *> Pos;
4429 unsigned Idx = 1;
4430 LValue PosLVal;
4431 if (Dependencies.IteratorExpr) {
4432 PosLVal = CGF.MakeAddrLValue(
4433 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4434 C.getSizeType());
4435 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4436 /*IsInit=*/true);
4437 Pos = &PosLVal;
4438 } else {
4439 Pos = &Idx;
4440 }
4441 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4442 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4443 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4444 CGF.Int8Ty);
4445 return DependenciesArray;
4446}
4447
4449 SourceLocation Loc) {
4450 ASTContext &C = CGM.getContext();
4451 QualType FlagsTy;
4452 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4453 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4454 C.VoidPtrTy.castAs<PointerType>());
4455 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4457 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4459 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4460 Addr.getElementType(), Addr.emitRawPointer(CGF),
4461 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4462 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4463 CGF.VoidPtrTy);
4464 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4465 // Use default allocator.
4466 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4467 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4468
4469 // _kmpc_free(gtid, addr, nullptr);
4470 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4471 CGM.getModule(), OMPRTL___kmpc_free),
4472 Args);
4473}
4474
4476 OpenMPDependClauseKind NewDepKind,
4477 SourceLocation Loc) {
4478 ASTContext &C = CGM.getContext();
4479 QualType FlagsTy;
4480 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4481 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4482 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4483 llvm::Value *NumDeps;
4484 LValue Base;
4485 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4486
4487 Address Begin = Base.getAddress();
4488 // Cast from pointer to array type to pointer to single element.
4489 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4490 Begin.emitRawPointer(CGF), NumDeps);
4491 // The basic structure here is a while-do loop.
4492 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4493 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4494 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4495 CGF.EmitBlock(BodyBB);
4496 llvm::PHINode *ElementPHI =
4497 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4498 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4499 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4500 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4501 Base.getTBAAInfo());
4502 // deps[i].flags = NewDepKind;
4503 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4504 LValue FlagsLVal = CGF.EmitLValueForField(
4505 Base, *std::next(KmpDependInfoRD->field_begin(),
4506 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4508 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4509 FlagsLVal);
4510
4511 // Shift the address forward by one element.
4512 llvm::Value *ElementNext =
4513 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4514 .emitRawPointer(CGF);
4515 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4516 llvm::Value *IsEmpty =
4517 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4518 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4519 // Done.
4520 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4521}
4522
4524 const OMPExecutableDirective &D,
4525 llvm::Function *TaskFunction,
4526 QualType SharedsTy, Address Shareds,
4527 const Expr *IfCond,
4528 const OMPTaskDataTy &Data) {
4529 if (!CGF.HaveInsertPoint())
4530 return;
4531
4533 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4534 llvm::Value *NewTask = Result.NewTask;
4535 llvm::Function *TaskEntry = Result.TaskEntry;
4536 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4537 LValue TDBase = Result.TDBase;
4538 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4539 // Process list of dependences.
4540 Address DependenciesArray = Address::invalid();
4541 llvm::Value *NumOfElements;
4542 std::tie(NumOfElements, DependenciesArray) =
4543 emitDependClause(CGF, Data.Dependences, Loc);
4544
4545 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4546 // libcall.
4547 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4548 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4549 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4550 // list is not empty
4551 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4552 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4553 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4554 llvm::Value *DepTaskArgs[7];
4555 if (!Data.Dependences.empty()) {
4556 DepTaskArgs[0] = UpLoc;
4557 DepTaskArgs[1] = ThreadID;
4558 DepTaskArgs[2] = NewTask;
4559 DepTaskArgs[3] = NumOfElements;
4560 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4561 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4562 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4563 }
4564 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4565 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4566 if (!Data.Tied) {
4567 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4568 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4569 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4570 }
4571 if (!Data.Dependences.empty()) {
4572 CGF.EmitRuntimeCall(
4573 OMPBuilder.getOrCreateRuntimeFunction(
4574 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4575 DepTaskArgs);
4576 } else {
4577 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4578 CGM.getModule(), OMPRTL___kmpc_omp_task),
4579 TaskArgs);
4580 }
4581 // Check if parent region is untied and build return for untied task;
4582 if (auto *Region =
4583 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4584 Region->emitUntiedSwitch(CGF);
4585 };
4586
4587 llvm::Value *DepWaitTaskArgs[7];
4588 if (!Data.Dependences.empty()) {
4589 DepWaitTaskArgs[0] = UpLoc;
4590 DepWaitTaskArgs[1] = ThreadID;
4591 DepWaitTaskArgs[2] = NumOfElements;
4592 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4593 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4594 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4595 DepWaitTaskArgs[6] =
4596 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4597 }
4598 auto &M = CGM.getModule();
4599 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4600 TaskEntry, &Data, &DepWaitTaskArgs,
4601 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4602 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4603 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4604 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4605 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4606 // is specified.
4607 if (!Data.Dependences.empty())
4608 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4609 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4610 DepWaitTaskArgs);
4611 // Call proxy_task_entry(gtid, new_task);
4612 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4613 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4614 Action.Enter(CGF);
4615 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4616 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4617 OutlinedFnArgs);
4618 };
4619
4620 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4621 // kmp_task_t *new_task);
4622 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4623 // kmp_task_t *new_task);
4625 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4626 M, OMPRTL___kmpc_omp_task_begin_if0),
4627 TaskArgs,
4628 OMPBuilder.getOrCreateRuntimeFunction(
4629 M, OMPRTL___kmpc_omp_task_complete_if0),
4630 TaskArgs);
4631 RCG.setAction(Action);
4632 RCG(CGF);
4633 };
4634
4635 if (IfCond) {
4636 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4637 } else {
4638 RegionCodeGenTy ThenRCG(ThenCodeGen);
4639 ThenRCG(CGF);
4640 }
4641}
4642
4644 const OMPLoopDirective &D,
4645 llvm::Function *TaskFunction,
4646 QualType SharedsTy, Address Shareds,
4647 const Expr *IfCond,
4648 const OMPTaskDataTy &Data) {
4649 if (!CGF.HaveInsertPoint())
4650 return;
4652 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4653 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4654 // libcall.
4655 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4656 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4657 // sched, kmp_uint64 grainsize, void *task_dup);
4658 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4659 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4660 llvm::Value *IfVal;
4661 if (IfCond) {
4662 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4663 /*isSigned=*/true);
4664 } else {
4665 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4666 }
4667
4668 LValue LBLVal = CGF.EmitLValueForField(
4669 Result.TDBase,
4670 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4671 const auto *LBVar =
4672 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4673 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4674 /*IsInitializer=*/true);
4675 LValue UBLVal = CGF.EmitLValueForField(
4676 Result.TDBase,
4677 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4678 const auto *UBVar =
4679 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4680 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4681 /*IsInitializer=*/true);
4682 LValue StLVal = CGF.EmitLValueForField(
4683 Result.TDBase,
4684 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4685 const auto *StVar =
4686 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4687 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4688 /*IsInitializer=*/true);
4689 // Store reductions address.
4690 LValue RedLVal = CGF.EmitLValueForField(
4691 Result.TDBase,
4692 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4693 if (Data.Reductions) {
4694 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4695 } else {
4696 CGF.EmitNullInitialization(RedLVal.getAddress(),
4697 CGF.getContext().VoidPtrTy);
4698 }
4699 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4701 UpLoc,
4702 ThreadID,
4703 Result.NewTask,
4704 IfVal,
4705 LBLVal.getPointer(CGF),
4706 UBLVal.getPointer(CGF),
4707 CGF.EmitLoadOfScalar(StLVal, Loc),
4708 llvm::ConstantInt::getSigned(
4709 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4710 llvm::ConstantInt::getSigned(
4711 CGF.IntTy, Data.Schedule.getPointer()
4712 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4713 : NoSchedule),
4714 Data.Schedule.getPointer()
4715 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4716 /*isSigned=*/false)
4717 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4718 if (Data.HasModifier)
4719 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4720
4721 TaskArgs.push_back(Result.TaskDupFn
4723 Result.TaskDupFn, CGF.VoidPtrTy)
4724 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4725 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4726 CGM.getModule(), Data.HasModifier
4727 ? OMPRTL___kmpc_taskloop_5
4728 : OMPRTL___kmpc_taskloop),
4729 TaskArgs);
4730}
4731
4732/// Emit reduction operation for each element of array (required for
4733/// array sections) LHS op = RHS.
4734/// \param Type Type of array.
4735/// \param LHSVar Variable on the left side of the reduction operation
4736/// (references element of array in original variable).
4737/// \param RHSVar Variable on the right side of the reduction operation
4738/// (references element of array in original variable).
4739/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4740/// RHSVar.
4742 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4743 const VarDecl *RHSVar,
4744 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4745 const Expr *, const Expr *)> &RedOpGen,
4746 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4747 const Expr *UpExpr = nullptr) {
4748 // Perform element-by-element initialization.
4749 QualType ElementTy;
4750 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4751 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4752
4753 // Drill down to the base element type on both arrays.
4754 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4755 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4756
4757 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4758 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4759 // Cast from pointer to array type to pointer to single element.
4760 llvm::Value *LHSEnd =
4761 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4762 // The basic structure here is a while-do loop.
4763 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4764 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4765 llvm::Value *IsEmpty =
4766 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4767 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4768
4769 // Enter the loop body, making that address the current address.
4770 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4771 CGF.EmitBlock(BodyBB);
4772
4773 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4774
4775 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4776 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4777 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4778 Address RHSElementCurrent(
4779 RHSElementPHI, RHSAddr.getElementType(),
4780 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4781
4782 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4783 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4784 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4785 Address LHSElementCurrent(
4786 LHSElementPHI, LHSAddr.getElementType(),
4787 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4788
4789 // Emit copy.
4791 Scope.addPrivate(LHSVar, LHSElementCurrent);
4792 Scope.addPrivate(RHSVar, RHSElementCurrent);
4793 Scope.Privatize();
4794 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4795 Scope.ForceCleanup();
4796
4797 // Shift the address forward by one element.
4798 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4799 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4800 "omp.arraycpy.dest.element");
4801 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4802 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4803 "omp.arraycpy.src.element");
4804 // Check whether we've reached the end.
4805 llvm::Value *Done =
4806 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4807 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4808 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4809 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4810
4811 // Done.
4812 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4813}
4814
4815/// Emit reduction combiner. If the combiner is a simple expression emit it as
4816/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4817/// UDR combiner function.
4819 const Expr *ReductionOp) {
4820 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4821 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4822 if (const auto *DRE =
4823 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4824 if (const auto *DRD =
4825 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4826 std::pair<llvm::Function *, llvm::Function *> Reduction =
4830 CGF.EmitIgnoredExpr(ReductionOp);
4831 return;
4832 }
4833 CGF.EmitIgnoredExpr(ReductionOp);
4834}
4835
4837 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4839 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4840 ASTContext &C = CGM.getContext();
4841
4842 // void reduction_func(void *LHSArg, void *RHSArg);
4843 FunctionArgList Args;
4844 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4846 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4848 Args.push_back(&LHSArg);
4849 Args.push_back(&RHSArg);
4850 const auto &CGFI =
4851 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4852 std::string Name = getReductionFuncName(ReducerName);
4853 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4854 llvm::GlobalValue::InternalLinkage, Name,
4855 &CGM.getModule());
4856 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4857 Fn->setDoesNotRecurse();
4858 CodeGenFunction CGF(CGM);
4859 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4860
4861 // Dst = (void*[n])(LHSArg);
4862 // Src = (void*[n])(RHSArg);
4864 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4865 CGF.Builder.getPtrTy(0)),
4866 ArgsElemType, CGF.getPointerAlign());
4868 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4869 CGF.Builder.getPtrTy(0)),
4870 ArgsElemType, CGF.getPointerAlign());
4871
4872 // ...
4873 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4874 // ...
4876 const auto *IPriv = Privates.begin();
4877 unsigned Idx = 0;
4878 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4879 const auto *RHSVar =
4880 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4881 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4882 const auto *LHSVar =
4883 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4884 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4885 QualType PrivTy = (*IPriv)->getType();
4886 if (PrivTy->isVariablyModifiedType()) {
4887 // Get array size and emit VLA type.
4888 ++Idx;
4889 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4890 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4891 const VariableArrayType *VLA =
4892 CGF.getContext().getAsVariableArrayType(PrivTy);
4893 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4895 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4896 CGF.EmitVariablyModifiedType(PrivTy);
4897 }
4898 }
4899 Scope.Privatize();
4900 IPriv = Privates.begin();
4901 const auto *ILHS = LHSExprs.begin();
4902 const auto *IRHS = RHSExprs.begin();
4903 for (const Expr *E : ReductionOps) {
4904 if ((*IPriv)->getType()->isArrayType()) {
4905 // Emit reduction for array section.
4906 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4907 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4909 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4910 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4911 emitReductionCombiner(CGF, E);
4912 });
4913 } else {
4914 // Emit reduction for array subscript or single variable.
4915 emitReductionCombiner(CGF, E);
4916 }
4917 ++IPriv;
4918 ++ILHS;
4919 ++IRHS;
4920 }
4921 Scope.ForceCleanup();
4922 CGF.FinishFunction();
4923 return Fn;
4924}
4925
4927 const Expr *ReductionOp,
4928 const Expr *PrivateRef,
4929 const DeclRefExpr *LHS,
4930 const DeclRefExpr *RHS) {
4931 if (PrivateRef->getType()->isArrayType()) {
4932 // Emit reduction for array section.
4933 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4934 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4936 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4937 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4938 emitReductionCombiner(CGF, ReductionOp);
4939 });
4940 } else {
4941 // Emit reduction for array subscript or single variable.
4942 emitReductionCombiner(CGF, ReductionOp);
4943 }
4944}
4945
4946static std::string generateUniqueName(CodeGenModule &CGM,
4947 llvm::StringRef Prefix, const Expr *Ref);
4948
4950 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4951 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4952
4953 // Create a shared global variable (__shared_reduction_var) to accumulate the
4954 // final result.
4955 //
4956 // Call __kmpc_barrier to synchronize threads before initialization.
4957 //
4958 // The master thread (thread_id == 0) initializes __shared_reduction_var
4959 // with the identity value or initializer.
4960 //
4961 // Call __kmpc_barrier to synchronize before combining.
4962 // For each i:
4963 // - Thread enters critical section.
4964 // - Reads its private value from LHSExprs[i].
4965 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4966 // Privates[i]).
4967 // - Exits critical section.
4968 //
4969 // Call __kmpc_barrier after combining.
4970 //
4971 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4972 //
4973 // Final __kmpc_barrier to synchronize after broadcasting
4974 QualType PrivateType = Privates->getType();
4975 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4976
4977 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4978 std::string ReductionVarNameStr;
4979 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4980 ReductionVarNameStr =
4981 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4982 else
4983 ReductionVarNameStr = "unnamed_priv_var";
4984
4985 // Create an internal shared variable
4986 std::string SharedName =
4987 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4988 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4989 LLVMType, ".omp.reduction." + SharedName);
4990
4991 SharedVar->setAlignment(
4992 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4993
4994 Address SharedResult =
4995 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4996
4997 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4998 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4999 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5000
5001 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
5002 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5003
5004 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5005 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5006 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5007
5008 CGF.EmitBlock(InitBB);
5009
5010 auto EmitSharedInit = [&]() {
5011 if (UDR) { // Check if it's a User-Defined Reduction
5012 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5013 std::pair<llvm::Function *, llvm::Function *> FnPair =
5015 llvm::Function *InitializerFn = FnPair.second;
5016 if (InitializerFn) {
5017 if (const auto *CE =
5018 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5019 const auto *OutDRE = cast<DeclRefExpr>(
5020 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5021 ->getSubExpr());
5022 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5023
5024 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5025 LocalScope.addPrivate(OutVD, SharedResult);
5026
5027 (void)LocalScope.Privatize();
5028 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5029 CE->getCallee()->IgnoreParenImpCasts())) {
5031 CGF, OVE, RValue::get(InitializerFn));
5032 CGF.EmitIgnoredExpr(CE);
5033 } else {
5034 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5035 PrivateType.getQualifiers(),
5036 /*IsInitializer=*/true);
5037 }
5038 } else {
5039 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5040 PrivateType.getQualifiers(),
5041 /*IsInitializer=*/true);
5042 }
5043 } else {
5044 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5045 PrivateType.getQualifiers(),
5046 /*IsInitializer=*/true);
5047 }
5048 } else {
5049 // EmitNullInitialization handles default construction for C++ classes
5050 // and zeroing for scalars, which is a reasonable default.
5051 CGF.EmitNullInitialization(SharedResult, PrivateType);
5052 }
5053 return; // UDR initialization handled
5054 }
5055 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5056 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5057 if (const Expr *InitExpr = VD->getInit()) {
5058 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5059 PrivateType.getQualifiers(), true);
5060 return;
5061 }
5062 }
5063 }
5064 CGF.EmitNullInitialization(SharedResult, PrivateType);
5065 };
5066 EmitSharedInit();
5067 CGF.Builder.CreateBr(InitEndBB);
5068 CGF.EmitBlock(InitEndBB);
5069
5070 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5071 CGM.getModule(), OMPRTL___kmpc_barrier),
5072 BarrierArgs);
5073
5074 const Expr *ReductionOp = ReductionOps;
5075 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5076 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5077 LValue LHSLV = CGF.EmitLValue(Privates);
5078
5079 auto EmitCriticalReduction = [&](auto ReductionGen) {
5080 std::string CriticalName = getName({"reduction_critical"});
5081 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5082 };
5083
5084 if (CurrentUDR) {
5085 // Handle user-defined reduction.
5086 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5087 Action.Enter(CGF);
5088 std::pair<llvm::Function *, llvm::Function *> FnPair =
5089 getUserDefinedReduction(CurrentUDR);
5090 if (FnPair.first) {
5091 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5092 const auto *OutDRE = cast<DeclRefExpr>(
5093 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5094 ->getSubExpr());
5095 const auto *InDRE = cast<DeclRefExpr>(
5096 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5097 ->getSubExpr());
5098 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5099 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5100 SharedLV.getAddress());
5101 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5102 LHSLV.getAddress());
5103 (void)LocalScope.Privatize();
5104 emitReductionCombiner(CGF, ReductionOp);
5105 }
5106 }
5107 };
5108 EmitCriticalReduction(ReductionGen);
5109 } else {
5110 // Handle built-in reduction operations.
5111#ifndef NDEBUG
5112 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5113 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5114 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5115
5116 const Expr *AssignRHS = nullptr;
5117 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5118 if (BinOp->getOpcode() == BO_Assign)
5119 AssignRHS = BinOp->getRHS();
5120 } else if (const auto *OpCall =
5121 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5122 if (OpCall->getOperator() == OO_Equal)
5123 AssignRHS = OpCall->getArg(1);
5124 }
5125
5126 assert(AssignRHS &&
5127 "Private Variable Reduction : Invalid ReductionOp expression");
5128#endif
5129
5130 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5131 Action.Enter(CGF);
5132 const auto *OmpOutDRE =
5133 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5134 const auto *OmpInDRE =
5135 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5136 assert(
5137 OmpOutDRE && OmpInDRE &&
5138 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5139 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5140 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5141 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5142 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5143 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5144 (void)LocalScope.Privatize();
5145 // Emit the actual reduction operation
5146 CGF.EmitIgnoredExpr(ReductionOp);
5147 };
5148 EmitCriticalReduction(ReductionGen);
5149 }
5150
5151 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5152 CGM.getModule(), OMPRTL___kmpc_barrier),
5153 BarrierArgs);
5154
5155 // Broadcast final result
5156 bool IsAggregate = PrivateType->isAggregateType();
5157 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5158 llvm::Value *FinalResultVal = nullptr;
5159 Address FinalResultAddr = Address::invalid();
5160
5161 if (IsAggregate)
5162 FinalResultAddr = SharedResult;
5163 else
5164 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5165
5166 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5167 if (IsAggregate) {
5168 CGF.EmitAggregateCopy(TargetLHSLV,
5169 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5170 PrivateType, AggValueSlot::DoesNotOverlap, false);
5171 } else {
5172 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5173 }
5174 // Final synchronization barrier
5175 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5176 CGM.getModule(), OMPRTL___kmpc_barrier),
5177 BarrierArgs);
5178
5179 // Combiner with original list item
5180 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5181 PrePostActionTy &Action) {
5182 Action.Enter(CGF);
5183 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5184 cast<DeclRefExpr>(LHSExprs),
5185 cast<DeclRefExpr>(RHSExprs));
5186 };
5187 EmitCriticalReduction(OriginalListCombiner);
5188}
5189
5191 ArrayRef<const Expr *> OrgPrivates,
5192 ArrayRef<const Expr *> OrgLHSExprs,
5193 ArrayRef<const Expr *> OrgRHSExprs,
5194 ArrayRef<const Expr *> OrgReductionOps,
5195 ReductionOptionsTy Options) {
5196 if (!CGF.HaveInsertPoint())
5197 return;
5198
5199 bool WithNowait = Options.WithNowait;
5200 bool SimpleReduction = Options.SimpleReduction;
5201
5202 // Next code should be emitted for reduction:
5203 //
5204 // static kmp_critical_name lock = { 0 };
5205 //
5206 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5207 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5208 // ...
5209 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5210 // *(Type<n>-1*)rhs[<n>-1]);
5211 // }
5212 //
5213 // ...
5214 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5215 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5216 // RedList, reduce_func, &<lock>)) {
5217 // case 1:
5218 // ...
5219 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5220 // ...
5221 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5222 // break;
5223 // case 2:
5224 // ...
5225 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5226 // ...
5227 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5228 // break;
5229 // default:;
5230 // }
5231 //
5232 // if SimpleReduction is true, only the next code is generated:
5233 // ...
5234 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5235 // ...
5236
5237 ASTContext &C = CGM.getContext();
5238
5239 if (SimpleReduction) {
5241 const auto *IPriv = OrgPrivates.begin();
5242 const auto *ILHS = OrgLHSExprs.begin();
5243 const auto *IRHS = OrgRHSExprs.begin();
5244 for (const Expr *E : OrgReductionOps) {
5245 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5246 cast<DeclRefExpr>(*IRHS));
5247 ++IPriv;
5248 ++ILHS;
5249 ++IRHS;
5250 }
5251 return;
5252 }
5253
5254 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5255 // Only keep entries where the corresponding variable is not private.
5256 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5257 FilteredRHSExprs, FilteredReductionOps;
5258 for (unsigned I : llvm::seq<unsigned>(
5259 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5260 if (!Options.IsPrivateVarReduction[I]) {
5261 FilteredPrivates.emplace_back(OrgPrivates[I]);
5262 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5263 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5264 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5265 }
5266 }
5267 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5268 // processing.
5269 ArrayRef<const Expr *> Privates = FilteredPrivates;
5270 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5271 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5272 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5273
5274 // 1. Build a list of reduction variables.
5275 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5276 auto Size = RHSExprs.size();
5277 for (const Expr *E : Privates) {
5278 if (E->getType()->isVariablyModifiedType())
5279 // Reserve place for array size.
5280 ++Size;
5281 }
5282 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5283 QualType ReductionArrayTy = C.getConstantArrayType(
5284 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5285 /*IndexTypeQuals=*/0);
5286 RawAddress ReductionList =
5287 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5288 const auto *IPriv = Privates.begin();
5289 unsigned Idx = 0;
5290 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5291 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5292 CGF.Builder.CreateStore(
5294 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5295 Elem);
5296 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5297 // Store array size.
5298 ++Idx;
5299 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5300 llvm::Value *Size = CGF.Builder.CreateIntCast(
5301 CGF.getVLASize(
5302 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5303 .NumElts,
5304 CGF.SizeTy, /*isSigned=*/false);
5305 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5306 Elem);
5307 }
5308 }
5309
5310 // 2. Emit reduce_func().
5311 llvm::Function *ReductionFn = emitReductionFunction(
5312 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5313 Privates, LHSExprs, RHSExprs, ReductionOps);
5314
5315 // 3. Create static kmp_critical_name lock = { 0 };
5316 std::string Name = getName({"reduction"});
5317 llvm::Value *Lock = getCriticalRegionLock(Name);
5318
5319 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5320 // RedList, reduce_func, &<lock>);
5321 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5322 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5323 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5324 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5325 ReductionList.getPointer(), CGF.VoidPtrTy);
5326 llvm::Value *Args[] = {
5327 IdentTLoc, // ident_t *<loc>
5328 ThreadId, // i32 <gtid>
5329 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5330 ReductionArrayTySize, // size_type sizeof(RedList)
5331 RL, // void *RedList
5332 ReductionFn, // void (*) (void *, void *) <reduce_func>
5333 Lock // kmp_critical_name *&<lock>
5334 };
5335 llvm::Value *Res = CGF.EmitRuntimeCall(
5336 OMPBuilder.getOrCreateRuntimeFunction(
5337 CGM.getModule(),
5338 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5339 Args);
5340
5341 // 5. Build switch(res)
5342 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5343 llvm::SwitchInst *SwInst =
5344 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5345
5346 // 6. Build case 1:
5347 // ...
5348 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5349 // ...
5350 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5351 // break;
5352 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5353 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5354 CGF.EmitBlock(Case1BB);
5355
5356 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5357 llvm::Value *EndArgs[] = {
5358 IdentTLoc, // ident_t *<loc>
5359 ThreadId, // i32 <gtid>
5360 Lock // kmp_critical_name *&<lock>
5361 };
5362 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5363 CodeGenFunction &CGF, PrePostActionTy &Action) {
5365 const auto *IPriv = Privates.begin();
5366 const auto *ILHS = LHSExprs.begin();
5367 const auto *IRHS = RHSExprs.begin();
5368 for (const Expr *E : ReductionOps) {
5369 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5370 cast<DeclRefExpr>(*IRHS));
5371 ++IPriv;
5372 ++ILHS;
5373 ++IRHS;
5374 }
5375 };
5377 CommonActionTy Action(
5378 nullptr, {},
5379 OMPBuilder.getOrCreateRuntimeFunction(
5380 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5381 : OMPRTL___kmpc_end_reduce),
5382 EndArgs);
5383 RCG.setAction(Action);
5384 RCG(CGF);
5385
5386 CGF.EmitBranch(DefaultBB);
5387
5388 // 7. Build case 2:
5389 // ...
5390 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5391 // ...
5392 // break;
5393 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5394 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5395 CGF.EmitBlock(Case2BB);
5396
5397 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5398 CodeGenFunction &CGF, PrePostActionTy &Action) {
5399 const auto *ILHS = LHSExprs.begin();
5400 const auto *IRHS = RHSExprs.begin();
5401 const auto *IPriv = Privates.begin();
5402 for (const Expr *E : ReductionOps) {
5403 const Expr *XExpr = nullptr;
5404 const Expr *EExpr = nullptr;
5405 const Expr *UpExpr = nullptr;
5406 BinaryOperatorKind BO = BO_Comma;
5407 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5408 if (BO->getOpcode() == BO_Assign) {
5409 XExpr = BO->getLHS();
5410 UpExpr = BO->getRHS();
5411 }
5412 }
5413 // Try to emit update expression as a simple atomic.
5414 const Expr *RHSExpr = UpExpr;
5415 if (RHSExpr) {
5416 // Analyze RHS part of the whole expression.
5417 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5418 RHSExpr->IgnoreParenImpCasts())) {
5419 // If this is a conditional operator, analyze its condition for
5420 // min/max reduction operator.
5421 RHSExpr = ACO->getCond();
5422 }
5423 if (const auto *BORHS =
5424 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5425 EExpr = BORHS->getRHS();
5426 BO = BORHS->getOpcode();
5427 }
5428 }
5429 if (XExpr) {
5430 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5431 auto &&AtomicRedGen = [BO, VD,
5432 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5433 const Expr *EExpr, const Expr *UpExpr) {
5434 LValue X = CGF.EmitLValue(XExpr);
5435 RValue E;
5436 if (EExpr)
5437 E = CGF.EmitAnyExpr(EExpr);
5438 CGF.EmitOMPAtomicSimpleUpdateExpr(
5439 X, E, BO, /*IsXLHSInRHSPart=*/true,
5440 llvm::AtomicOrdering::Monotonic, Loc,
5441 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5442 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5443 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5444 CGF.emitOMPSimpleStore(
5445 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5446 VD->getType().getNonReferenceType(), Loc);
5447 PrivateScope.addPrivate(VD, LHSTemp);
5448 (void)PrivateScope.Privatize();
5449 return CGF.EmitAnyExpr(UpExpr);
5450 });
5451 };
5452 if ((*IPriv)->getType()->isArrayType()) {
5453 // Emit atomic reduction for array section.
5454 const auto *RHSVar =
5455 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5456 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5457 AtomicRedGen, XExpr, EExpr, UpExpr);
5458 } else {
5459 // Emit atomic reduction for array subscript or single variable.
5460 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5461 }
5462 } else {
5463 // Emit as a critical region.
5464 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5465 const Expr *, const Expr *) {
5466 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5467 std::string Name = RT.getName({"atomic_reduction"});
5469 CGF, Name,
5470 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5471 Action.Enter(CGF);
5472 emitReductionCombiner(CGF, E);
5473 },
5474 Loc);
5475 };
5476 if ((*IPriv)->getType()->isArrayType()) {
5477 const auto *LHSVar =
5478 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5479 const auto *RHSVar =
5480 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5481 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5482 CritRedGen);
5483 } else {
5484 CritRedGen(CGF, nullptr, nullptr, nullptr);
5485 }
5486 }
5487 ++ILHS;
5488 ++IRHS;
5489 ++IPriv;
5490 }
5491 };
5492 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5493 if (!WithNowait) {
5494 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5495 llvm::Value *EndArgs[] = {
5496 IdentTLoc, // ident_t *<loc>
5497 ThreadId, // i32 <gtid>
5498 Lock // kmp_critical_name *&<lock>
5499 };
5500 CommonActionTy Action(nullptr, {},
5501 OMPBuilder.getOrCreateRuntimeFunction(
5502 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5503 EndArgs);
5504 AtomicRCG.setAction(Action);
5505 AtomicRCG(CGF);
5506 } else {
5507 AtomicRCG(CGF);
5508 }
5509
5510 CGF.EmitBranch(DefaultBB);
5511 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5512 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5513 "PrivateVarReduction: Privates size mismatch");
5514 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5515 "PrivateVarReduction: ReductionOps size mismatch");
5516 for (unsigned I : llvm::seq<unsigned>(
5517 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5518 if (Options.IsPrivateVarReduction[I])
5519 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5520 OrgRHSExprs[I], OrgReductionOps[I]);
5521 }
5522}
5523
5524/// Generates unique name for artificial threadprivate variables.
5525/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5526static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5527 const Expr *Ref) {
5528 SmallString<256> Buffer;
5529 llvm::raw_svector_ostream Out(Buffer);
5530 const clang::DeclRefExpr *DE;
5531 const VarDecl *D = ::getBaseDecl(Ref, DE);
5532 if (!D)
5533 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5534 D = D->getCanonicalDecl();
5535 std::string Name = CGM.getOpenMPRuntime().getName(
5536 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5537 Out << Prefix << Name << "_"
5539 return std::string(Out.str());
5540}
5541
5542/// Emits reduction initializer function:
5543/// \code
5544/// void @.red_init(void* %arg, void* %orig) {
5545/// %0 = bitcast void* %arg to <type>*
5546/// store <type> <init>, <type>* %0
5547/// ret void
5548/// }
5549/// \endcode
5550static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5551 SourceLocation Loc,
5552 ReductionCodeGen &RCG, unsigned N) {
5553 ASTContext &C = CGM.getContext();
5554 QualType VoidPtrTy = C.VoidPtrTy;
5555 VoidPtrTy.addRestrict();
5556 FunctionArgList Args;
5557 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5559 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5561 Args.emplace_back(&Param);
5562 Args.emplace_back(&ParamOrig);
5563 const auto &FnInfo =
5564 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5565 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5566 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5567 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5568 Name, &CGM.getModule());
5569 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5570 Fn->setDoesNotRecurse();
5571 CodeGenFunction CGF(CGM);
5572 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5573 QualType PrivateType = RCG.getPrivateType(N);
5574 Address PrivateAddr = CGF.EmitLoadOfPointer(
5575 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5576 C.getPointerType(PrivateType)->castAs<PointerType>());
5577 llvm::Value *Size = nullptr;
5578 // If the size of the reduction item is non-constant, load it from global
5579 // threadprivate variable.
5580 if (RCG.getSizes(N).second) {
5582 CGF, CGM.getContext().getSizeType(),
5583 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5584 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5585 CGM.getContext().getSizeType(), Loc);
5586 }
5587 RCG.emitAggregateType(CGF, N, Size);
5588 Address OrigAddr = Address::invalid();
5589 // If initializer uses initializer from declare reduction construct, emit a
5590 // pointer to the address of the original reduction item (reuired by reduction
5591 // initializer)
5592 if (RCG.usesReductionInitializer(N)) {
5593 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5594 OrigAddr = CGF.EmitLoadOfPointer(
5595 SharedAddr,
5596 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5597 }
5598 // Emit the initializer:
5599 // %0 = bitcast void* %arg to <type>*
5600 // store <type> <init>, <type>* %0
5601 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5602 [](CodeGenFunction &) { return false; });
5603 CGF.FinishFunction();
5604 return Fn;
5605}
5606
5607/// Emits reduction combiner function:
5608/// \code
5609/// void @.red_comb(void* %arg0, void* %arg1) {
5610/// %lhs = bitcast void* %arg0 to <type>*
5611/// %rhs = bitcast void* %arg1 to <type>*
5612/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5613/// store <type> %2, <type>* %lhs
5614/// ret void
5615/// }
5616/// \endcode
5617static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5618 SourceLocation Loc,
5619 ReductionCodeGen &RCG, unsigned N,
5620 const Expr *ReductionOp,
5621 const Expr *LHS, const Expr *RHS,
5622 const Expr *PrivateRef) {
5623 ASTContext &C = CGM.getContext();
5624 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5625 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5626 FunctionArgList Args;
5627 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5628 C.VoidPtrTy, ImplicitParamKind::Other);
5629 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5631 Args.emplace_back(&ParamInOut);
5632 Args.emplace_back(&ParamIn);
5633 const auto &FnInfo =
5634 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5635 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5636 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5637 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5638 Name, &CGM.getModule());
5639 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5640 Fn->setDoesNotRecurse();
5641 CodeGenFunction CGF(CGM);
5642 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5643 llvm::Value *Size = nullptr;
5644 // If the size of the reduction item is non-constant, load it from global
5645 // threadprivate variable.
5646 if (RCG.getSizes(N).second) {
5648 CGF, CGM.getContext().getSizeType(),
5649 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5650 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5651 CGM.getContext().getSizeType(), Loc);
5652 }
5653 RCG.emitAggregateType(CGF, N, Size);
5654 // Remap lhs and rhs variables to the addresses of the function arguments.
5655 // %lhs = bitcast void* %arg0 to <type>*
5656 // %rhs = bitcast void* %arg1 to <type>*
5657 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5658 PrivateScope.addPrivate(
5659 LHSVD,
5660 // Pull out the pointer to the variable.
5662 CGF.GetAddrOfLocalVar(&ParamInOut)
5663 .withElementType(CGF.Builder.getPtrTy(0)),
5664 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5665 PrivateScope.addPrivate(
5666 RHSVD,
5667 // Pull out the pointer to the variable.
5669 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5670 CGF.Builder.getPtrTy(0)),
5671 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5672 PrivateScope.Privatize();
5673 // Emit the combiner body:
5674 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5675 // store <type> %2, <type>* %lhs
5677 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5678 cast<DeclRefExpr>(RHS));
5679 CGF.FinishFunction();
5680 return Fn;
5681}
5682
5683/// Emits reduction finalizer function:
5684/// \code
5685/// void @.red_fini(void* %arg) {
5686/// %0 = bitcast void* %arg to <type>*
5687/// <destroy>(<type>* %0)
5688/// ret void
5689/// }
5690/// \endcode
5691static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5692 SourceLocation Loc,
5693 ReductionCodeGen &RCG, unsigned N) {
5694 if (!RCG.needCleanups(N))
5695 return nullptr;
5696 ASTContext &C = CGM.getContext();
5697 FunctionArgList Args;
5698 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5700 Args.emplace_back(&Param);
5701 const auto &FnInfo =
5702 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5703 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5704 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5705 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5706 Name, &CGM.getModule());
5707 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5708 Fn->setDoesNotRecurse();
5709 CodeGenFunction CGF(CGM);
5710 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5711 Address PrivateAddr = CGF.EmitLoadOfPointer(
5712 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5713 llvm::Value *Size = nullptr;
5714 // If the size of the reduction item is non-constant, load it from global
5715 // threadprivate variable.
5716 if (RCG.getSizes(N).second) {
5718 CGF, CGM.getContext().getSizeType(),
5719 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5720 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5721 CGM.getContext().getSizeType(), Loc);
5722 }
5723 RCG.emitAggregateType(CGF, N, Size);
5724 // Emit the finalizer body:
5725 // <destroy>(<type>* %0)
5726 RCG.emitCleanups(CGF, N, PrivateAddr);
5727 CGF.FinishFunction(Loc);
5728 return Fn;
5729}
5730
5733 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5734 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5735 return nullptr;
5736
5737 // Build typedef struct:
5738 // kmp_taskred_input {
5739 // void *reduce_shar; // shared reduction item
5740 // void *reduce_orig; // original reduction item used for initialization
5741 // size_t reduce_size; // size of data item
5742 // void *reduce_init; // data initialization routine
5743 // void *reduce_fini; // data finalization routine
5744 // void *reduce_comb; // data combiner routine
5745 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5746 // } kmp_taskred_input_t;
5747 ASTContext &C = CGM.getContext();
5748 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5749 RD->startDefinition();
5750 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5751 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5752 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5753 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5754 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5755 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5756 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5757 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5758 RD->completeDefinition();
5759 CanQualType RDType = C.getCanonicalTagType(RD);
5760 unsigned Size = Data.ReductionVars.size();
5761 llvm::APInt ArraySize(/*numBits=*/64, Size);
5762 QualType ArrayRDType =
5763 C.getConstantArrayType(RDType, ArraySize, nullptr,
5764 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5765 // kmp_task_red_input_t .rd_input.[Size];
5766 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5767 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5768 Data.ReductionCopies, Data.ReductionOps);
5769 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5770 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5771 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5772 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5773 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5774 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5775 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5776 ".rd_input.gep.");
5777 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5778 // ElemLVal.reduce_shar = &Shareds[Cnt];
5779 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5780 RCG.emitSharedOrigLValue(CGF, Cnt);
5781 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5782 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5783 // ElemLVal.reduce_orig = &Origs[Cnt];
5784 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5785 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5786 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5787 RCG.emitAggregateType(CGF, Cnt);
5788 llvm::Value *SizeValInChars;
5789 llvm::Value *SizeVal;
5790 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5791 // We use delayed creation/initialization for VLAs and array sections. It is
5792 // required because runtime does not provide the way to pass the sizes of
5793 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5794 // threadprivate global variables are used to store these values and use
5795 // them in the functions.
5796 bool DelayedCreation = !!SizeVal;
5797 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5798 /*isSigned=*/false);
5799 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5800 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5801 // ElemLVal.reduce_init = init;
5802 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5803 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5804 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5805 // ElemLVal.reduce_fini = fini;
5806 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5807 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5808 llvm::Value *FiniAddr =
5809 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5810 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5811 // ElemLVal.reduce_comb = comb;
5812 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5813 llvm::Value *CombAddr = emitReduceCombFunction(
5814 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5815 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5816 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5817 // ElemLVal.flags = 0;
5818 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5819 if (DelayedCreation) {
5821 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5822 FlagsLVal);
5823 } else
5824 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5825 }
5826 if (Data.IsReductionWithTaskMod) {
5827 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5828 // is_ws, int num, void *data);
5829 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5830 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5831 CGM.IntTy, /*isSigned=*/true);
5832 llvm::Value *Args[] = {
5833 IdentTLoc, GTid,
5834 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5835 /*isSigned=*/true),
5836 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5838 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5839 return CGF.EmitRuntimeCall(
5840 OMPBuilder.getOrCreateRuntimeFunction(
5841 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5842 Args);
5843 }
5844 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5845 llvm::Value *Args[] = {
5846 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5847 /*isSigned=*/true),
5848 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5850 CGM.VoidPtrTy)};
5851 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5852 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5853 Args);
5854}
5855
5857 SourceLocation Loc,
5858 bool IsWorksharingReduction) {
5859 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5860 // is_ws, int num, void *data);
5861 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5862 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5863 CGM.IntTy, /*isSigned=*/true);
5864 llvm::Value *Args[] = {IdentTLoc, GTid,
5865 llvm::ConstantInt::get(CGM.IntTy,
5866 IsWorksharingReduction ? 1 : 0,
5867 /*isSigned=*/true)};
5868 (void)CGF.EmitRuntimeCall(
5869 OMPBuilder.getOrCreateRuntimeFunction(
5870 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5871 Args);
5872}
5873
5875 SourceLocation Loc,
5876 ReductionCodeGen &RCG,
5877 unsigned N) {
5878 auto Sizes = RCG.getSizes(N);
5879 // Emit threadprivate global variable if the type is non-constant
5880 // (Sizes.second = nullptr).
5881 if (Sizes.second) {
5882 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5883 /*isSigned=*/false);
5885 CGF, CGM.getContext().getSizeType(),
5886 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5887 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5888 }
5889}
5890
5892 SourceLocation Loc,
5893 llvm::Value *ReductionsPtr,
5894 LValue SharedLVal) {
5895 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5896 // *d);
5897 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5898 CGM.IntTy,
5899 /*isSigned=*/true),
5900 ReductionsPtr,
5902 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5903 return Address(
5904 CGF.EmitRuntimeCall(
5905 OMPBuilder.getOrCreateRuntimeFunction(
5906 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5907 Args),
5908 CGF.Int8Ty, SharedLVal.getAlignment());
5909}
5910
5912 const OMPTaskDataTy &Data) {
5913 if (!CGF.HaveInsertPoint())
5914 return;
5915
5916 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5917 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5918 OMPBuilder.createTaskwait(CGF.Builder);
5919 } else {
5920 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5921 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5922 auto &M = CGM.getModule();
5923 Address DependenciesArray = Address::invalid();
5924 llvm::Value *NumOfElements;
5925 std::tie(NumOfElements, DependenciesArray) =
5926 emitDependClause(CGF, Data.Dependences, Loc);
5927 if (!Data.Dependences.empty()) {
5928 llvm::Value *DepWaitTaskArgs[7];
5929 DepWaitTaskArgs[0] = UpLoc;
5930 DepWaitTaskArgs[1] = ThreadID;
5931 DepWaitTaskArgs[2] = NumOfElements;
5932 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5933 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5934 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5935 DepWaitTaskArgs[6] =
5936 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5937
5938 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5939
5940 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5941 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5942 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5943 // kmp_int32 has_no_wait); if dependence info is specified.
5944 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5945 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5946 DepWaitTaskArgs);
5947
5948 } else {
5949
5950 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5951 // global_tid);
5952 llvm::Value *Args[] = {UpLoc, ThreadID};
5953 // Ignore return result until untied tasks are supported.
5954 CGF.EmitRuntimeCall(
5955 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5956 Args);
5957 }
5958 }
5959
5960 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5961 Region->emitUntiedSwitch(CGF);
5962}
5963
5965 OpenMPDirectiveKind InnerKind,
5966 const RegionCodeGenTy &CodeGen,
5967 bool HasCancel) {
5968 if (!CGF.HaveInsertPoint())
5969 return;
5970 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5971 InnerKind != OMPD_critical &&
5972 InnerKind != OMPD_master &&
5973 InnerKind != OMPD_masked);
5974 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5975}
5976
5977namespace {
5978enum RTCancelKind {
5979 CancelNoreq = 0,
5980 CancelParallel = 1,
5981 CancelLoop = 2,
5982 CancelSections = 3,
5983 CancelTaskgroup = 4
5984};
5985} // anonymous namespace
5986
5987static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5988 RTCancelKind CancelKind = CancelNoreq;
5989 if (CancelRegion == OMPD_parallel)
5990 CancelKind = CancelParallel;
5991 else if (CancelRegion == OMPD_for)
5992 CancelKind = CancelLoop;
5993 else if (CancelRegion == OMPD_sections)
5994 CancelKind = CancelSections;
5995 else {
5996 assert(CancelRegion == OMPD_taskgroup);
5997 CancelKind = CancelTaskgroup;
5998 }
5999 return CancelKind;
6000}
6001
6004 OpenMPDirectiveKind CancelRegion) {
6005 if (!CGF.HaveInsertPoint())
6006 return;
6007 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6008 // global_tid, kmp_int32 cncl_kind);
6009 if (auto *OMPRegionInfo =
6010 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6011 // For 'cancellation point taskgroup', the task region info may not have a
6012 // cancel. This may instead happen in another adjacent task.
6013 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6014 llvm::Value *Args[] = {
6015 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6016 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6017 // Ignore return result until untied tasks are supported.
6018 llvm::Value *Result = CGF.EmitRuntimeCall(
6019 OMPBuilder.getOrCreateRuntimeFunction(
6020 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6021 Args);
6022 // if (__kmpc_cancellationpoint()) {
6023 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6024 // exit from construct;
6025 // }
6026 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6027 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6028 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6029 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6030 CGF.EmitBlock(ExitBB);
6031 if (CancelRegion == OMPD_parallel)
6032 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6033 // exit from construct;
6034 CodeGenFunction::JumpDest CancelDest =
6035 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6036 CGF.EmitBranchThroughCleanup(CancelDest);
6037 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6038 }
6039 }
6040}
6041
6043 const Expr *IfCond,
6044 OpenMPDirectiveKind CancelRegion) {
6045 if (!CGF.HaveInsertPoint())
6046 return;
6047 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6048 // kmp_int32 cncl_kind);
6049 auto &M = CGM.getModule();
6050 if (auto *OMPRegionInfo =
6051 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6052 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6053 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6054 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6055 llvm::Value *Args[] = {
6056 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6057 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6058 // Ignore return result until untied tasks are supported.
6059 llvm::Value *Result = CGF.EmitRuntimeCall(
6060 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6061 // if (__kmpc_cancel()) {
6062 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6063 // exit from construct;
6064 // }
6065 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6066 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6067 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6068 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6069 CGF.EmitBlock(ExitBB);
6070 if (CancelRegion == OMPD_parallel)
6071 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6072 // exit from construct;
6073 CodeGenFunction::JumpDest CancelDest =
6074 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6075 CGF.EmitBranchThroughCleanup(CancelDest);
6076 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6077 };
6078 if (IfCond) {
6079 emitIfClause(CGF, IfCond, ThenGen,
6080 [](CodeGenFunction &, PrePostActionTy &) {});
6081 } else {
6082 RegionCodeGenTy ThenRCG(ThenGen);
6083 ThenRCG(CGF);
6084 }
6085 }
6086}
6087
6088namespace {
6089/// Cleanup action for uses_allocators support.
6090class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6092
6093public:
6094 OMPUsesAllocatorsActionTy(
6095 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6096 : Allocators(Allocators) {}
6097 void Enter(CodeGenFunction &CGF) override {
6098 if (!CGF.HaveInsertPoint())
6099 return;
6100 for (const auto &AllocatorData : Allocators) {
6102 CGF, AllocatorData.first, AllocatorData.second);
6103 }
6104 }
6105 void Exit(CodeGenFunction &CGF) override {
6106 if (!CGF.HaveInsertPoint())
6107 return;
6108 for (const auto &AllocatorData : Allocators) {
6110 AllocatorData.first);
6111 }
6112 }
6113};
6114} // namespace
6115
6117 const OMPExecutableDirective &D, StringRef ParentName,
6118 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6119 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6120 assert(!ParentName.empty() && "Invalid target entry parent name!");
6123 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6124 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6125 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6126 if (!D.AllocatorTraits)
6127 continue;
6128 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6129 }
6130 }
6131 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6132 CodeGen.setAction(UsesAllocatorAction);
6133 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6134 IsOffloadEntry, CodeGen);
6135}
6136
6138 const Expr *Allocator,
6139 const Expr *AllocatorTraits) {
6140 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6141 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6142 // Use default memspace handle.
6143 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6144 llvm::Value *NumTraits = llvm::ConstantInt::get(
6146 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6147 ->getSize()
6148 .getLimitedValue());
6149 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6151 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6152 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6153 AllocatorTraitsLVal.getBaseInfo(),
6154 AllocatorTraitsLVal.getTBAAInfo());
6155 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6156
6157 llvm::Value *AllocatorVal =
6158 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6159 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6160 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6161 // Store to allocator.
6163 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6164 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6165 AllocatorVal =
6166 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6167 Allocator->getType(), Allocator->getExprLoc());
6168 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6169}
6170
6172 const Expr *Allocator) {
6173 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6174 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6175 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6176 llvm::Value *AllocatorVal =
6177 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6178 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6179 CGF.getContext().VoidPtrTy,
6180 Allocator->getExprLoc());
6181 (void)CGF.EmitRuntimeCall(
6182 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6183 OMPRTL___kmpc_destroy_allocator),
6184 {ThreadId, AllocatorVal});
6185}
6186
6189 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6190 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6191 "invalid default attrs structure");
6192 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6193 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6194
6195 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6196 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6197 /*UpperBoundOnly=*/true);
6198
6199 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6200 for (auto *A : C->getAttrs()) {
6201 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6202 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6203 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6204 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6205 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6206 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6207 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6208 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6209 &AttrMaxThreadsVal);
6210 else
6211 continue;
6212
6213 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6214 if (AttrMaxThreadsVal > 0)
6215 MaxThreadsVal = MaxThreadsVal > 0
6216 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6217 : AttrMaxThreadsVal;
6218 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6219 if (AttrMaxBlocksVal > 0)
6220 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6221 : AttrMaxBlocksVal;
6222 }
6223 }
6224}
6225
6227 const OMPExecutableDirective &D, StringRef ParentName,
6228 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6229 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6230
6231 llvm::TargetRegionEntryInfo EntryInfo =
6232 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6233
6234 CodeGenFunction CGF(CGM, true);
6235 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6236 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6237 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6238
6239 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6241 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6242 };
6243
6244 cantFail(OMPBuilder.emitTargetRegionFunction(
6245 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6246 OutlinedFnID));
6247
6248 if (!OutlinedFn)
6249 return;
6250
6251 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6252
6253 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6254 for (auto *A : C->getAttrs()) {
6255 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6256 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6257 }
6258 }
6259}
6260
6261/// Checks if the expression is constant or does not have non-trivial function
6262/// calls.
6263static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6264 // We can skip constant expressions.
6265 // We can skip expressions with trivial calls or simple expressions.
6267 !E->hasNonTrivialCall(Ctx)) &&
6268 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6269}
6270
6272 const Stmt *Body) {
6273 const Stmt *Child = Body->IgnoreContainers();
6274 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6275 Child = nullptr;
6276 for (const Stmt *S : C->body()) {
6277 if (const auto *E = dyn_cast<Expr>(S)) {
6278 if (isTrivial(Ctx, E))
6279 continue;
6280 }
6281 // Some of the statements can be ignored.
6284 continue;
6285 // Analyze declarations.
6286 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6287 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6288 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6289 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6290 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6291 isa<UsingDirectiveDecl>(D) ||
6292 isa<OMPDeclareReductionDecl>(D) ||
6293 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6294 return true;
6295 const auto *VD = dyn_cast<VarDecl>(D);
6296 if (!VD)
6297 return false;
6298 return VD->hasGlobalStorage() || !VD->isUsed();
6299 }))
6300 continue;
6301 }
6302 // Found multiple children - cannot get the one child only.
6303 if (Child)
6304 return nullptr;
6305 Child = S;
6306 }
6307 if (Child)
6308 Child = Child->IgnoreContainers();
6309 }
6310 return Child;
6311}
6312
6314 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6315 int32_t &MaxTeamsVal) {
6316
6317 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6318 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6319 "Expected target-based executable directive.");
6320 switch (DirectiveKind) {
6321 case OMPD_target: {
6322 const auto *CS = D.getInnermostCapturedStmt();
6323 const auto *Body =
6324 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6325 const Stmt *ChildStmt =
6327 if (const auto *NestedDir =
6328 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6329 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6330 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6331 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6332 ->getNumTeams()
6333 .front();
6334 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6335 if (auto Constant =
6336 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6337 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6338 return NumTeams;
6339 }
6340 MinTeamsVal = MaxTeamsVal = 0;
6341 return nullptr;
6342 }
6343 MinTeamsVal = MaxTeamsVal = 1;
6344 return nullptr;
6345 }
6346 // A value of -1 is used to check if we need to emit no teams region
6347 MinTeamsVal = MaxTeamsVal = -1;
6348 return nullptr;
6349 }
6350 case OMPD_target_teams_loop:
6351 case OMPD_target_teams:
6352 case OMPD_target_teams_distribute:
6353 case OMPD_target_teams_distribute_simd:
6354 case OMPD_target_teams_distribute_parallel_for:
6355 case OMPD_target_teams_distribute_parallel_for_simd: {
6356 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6357 const Expr *NumTeams =
6358 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6359 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6360 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6361 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6362 return NumTeams;
6363 }
6364 MinTeamsVal = MaxTeamsVal = 0;
6365 return nullptr;
6366 }
6367 case OMPD_target_parallel:
6368 case OMPD_target_parallel_for:
6369 case OMPD_target_parallel_for_simd:
6370 case OMPD_target_parallel_loop:
6371 case OMPD_target_simd:
6372 MinTeamsVal = MaxTeamsVal = 1;
6373 return nullptr;
6374 case OMPD_parallel:
6375 case OMPD_for:
6376 case OMPD_parallel_for:
6377 case OMPD_parallel_loop:
6378 case OMPD_parallel_master:
6379 case OMPD_parallel_sections:
6380 case OMPD_for_simd:
6381 case OMPD_parallel_for_simd:
6382 case OMPD_cancel:
6383 case OMPD_cancellation_point:
6384 case OMPD_ordered:
6385 case OMPD_threadprivate:
6386 case OMPD_allocate:
6387 case OMPD_task:
6388 case OMPD_simd:
6389 case OMPD_tile:
6390 case OMPD_unroll:
6391 case OMPD_sections:
6392 case OMPD_section:
6393 case OMPD_single:
6394 case OMPD_master:
6395 case OMPD_critical:
6396 case OMPD_taskyield:
6397 case OMPD_barrier:
6398 case OMPD_taskwait:
6399 case OMPD_taskgroup:
6400 case OMPD_atomic:
6401 case OMPD_flush:
6402 case OMPD_depobj:
6403 case OMPD_scan:
6404 case OMPD_teams:
6405 case OMPD_target_data:
6406 case OMPD_target_exit_data:
6407 case OMPD_target_enter_data:
6408 case OMPD_distribute:
6409 case OMPD_distribute_simd:
6410 case OMPD_distribute_parallel_for:
6411 case OMPD_distribute_parallel_for_simd:
6412 case OMPD_teams_distribute:
6413 case OMPD_teams_distribute_simd:
6414 case OMPD_teams_distribute_parallel_for:
6415 case OMPD_teams_distribute_parallel_for_simd:
6416 case OMPD_target_update:
6417 case OMPD_declare_simd:
6418 case OMPD_declare_variant:
6419 case OMPD_begin_declare_variant:
6420 case OMPD_end_declare_variant:
6421 case OMPD_declare_target:
6422 case OMPD_end_declare_target:
6423 case OMPD_declare_reduction:
6424 case OMPD_declare_mapper:
6425 case OMPD_taskloop:
6426 case OMPD_taskloop_simd:
6427 case OMPD_master_taskloop:
6428 case OMPD_master_taskloop_simd:
6429 case OMPD_parallel_master_taskloop:
6430 case OMPD_parallel_master_taskloop_simd:
6431 case OMPD_requires:
6432 case OMPD_metadirective:
6433 case OMPD_unknown:
6434 break;
6435 default:
6436 break;
6437 }
6438 llvm_unreachable("Unexpected directive kind.");
6439}
6440
6442 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6443 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6444 "Clauses associated with the teams directive expected to be emitted "
6445 "only for the host!");
6446 CGBuilderTy &Bld = CGF.Builder;
6447 int32_t MinNT = -1, MaxNT = -1;
6448 const Expr *NumTeams =
6449 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6450 if (NumTeams != nullptr) {
6451 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6452
6453 switch (DirectiveKind) {
6454 case OMPD_target: {
6455 const auto *CS = D.getInnermostCapturedStmt();
6456 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6457 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6458 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6459 /*IgnoreResultAssign*/ true);
6460 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6461 /*isSigned=*/true);
6462 }
6463 case OMPD_target_teams:
6464 case OMPD_target_teams_distribute:
6465 case OMPD_target_teams_distribute_simd:
6466 case OMPD_target_teams_distribute_parallel_for:
6467 case OMPD_target_teams_distribute_parallel_for_simd: {
6468 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6469 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6470 /*IgnoreResultAssign*/ true);
6471 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6472 /*isSigned=*/true);
6473 }
6474 default:
6475 break;
6476 }
6477 }
6478
6479 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6480 return llvm::ConstantInt::getSigned(CGF.Int32Ty, MinNT);
6481}
6482
6483/// Check for a num threads constant value (stored in \p DefaultVal), or
6484/// expression (stored in \p E). If the value is conditional (via an if-clause),
6485/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6486/// nullptr, no expression evaluation is perfomed.
6487static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6488 const Expr **E, int32_t &UpperBound,
6489 bool UpperBoundOnly, llvm::Value **CondVal) {
6491 CGF.getContext(), CS->getCapturedStmt());
6492 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6493 if (!Dir)
6494 return;
6495
6496 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6497 // Handle if clause. If if clause present, the number of threads is
6498 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6499 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6500 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6501 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6502 const OMPIfClause *IfClause = nullptr;
6503 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6504 if (C->getNameModifier() == OMPD_unknown ||
6505 C->getNameModifier() == OMPD_parallel) {
6506 IfClause = C;
6507 break;
6508 }
6509 }
6510 if (IfClause) {
6511 const Expr *CondExpr = IfClause->getCondition();
6512 bool Result;
6513 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6514 if (!Result) {
6515 UpperBound = 1;
6516 return;
6517 }
6518 } else {
6520 if (const auto *PreInit =
6521 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6522 for (const auto *I : PreInit->decls()) {
6523 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6524 CGF.EmitVarDecl(cast<VarDecl>(*I));
6525 } else {
6528 CGF.EmitAutoVarCleanups(Emission);
6529 }
6530 }
6531 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6532 }
6533 }
6534 }
6535 }
6536 // Check the value of num_threads clause iff if clause was not specified
6537 // or is not evaluated to false.
6538 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6539 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6540 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6541 const auto *NumThreadsClause =
6542 Dir->getSingleClause<OMPNumThreadsClause>();
6543 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6544 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6545 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6546 UpperBound =
6547 UpperBound
6548 ? Constant->getZExtValue()
6549 : std::min(UpperBound,
6550 static_cast<int32_t>(Constant->getZExtValue()));
6551 // If we haven't found a upper bound, remember we saw a thread limiting
6552 // clause.
6553 if (UpperBound == -1)
6554 UpperBound = 0;
6555 if (!E)
6556 return;
6557 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6558 if (const auto *PreInit =
6559 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6560 for (const auto *I : PreInit->decls()) {
6561 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6562 CGF.EmitVarDecl(cast<VarDecl>(*I));
6563 } else {
6566 CGF.EmitAutoVarCleanups(Emission);
6567 }
6568 }
6569 }
6570 *E = NTExpr;
6571 }
6572 return;
6573 }
6574 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6575 UpperBound = 1;
6576}
6577
6579 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6580 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6581 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6582 "Clauses associated with the teams directive expected to be emitted "
6583 "only for the host!");
6584 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6585 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6586 "Expected target-based executable directive.");
6587
6588 const Expr *NT = nullptr;
6589 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6590
6591 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6592 if (E->isIntegerConstantExpr(CGF.getContext())) {
6593 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6594 UpperBound = UpperBound ? Constant->getZExtValue()
6595 : std::min(UpperBound,
6596 int32_t(Constant->getZExtValue()));
6597 }
6598 // If we haven't found a upper bound, remember we saw a thread limiting
6599 // clause.
6600 if (UpperBound == -1)
6601 UpperBound = 0;
6602 if (EPtr)
6603 *EPtr = E;
6604 };
6605
6606 auto ReturnSequential = [&]() {
6607 UpperBound = 1;
6608 return NT;
6609 };
6610
6611 switch (DirectiveKind) {
6612 case OMPD_target: {
6613 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6614 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6616 CGF.getContext(), CS->getCapturedStmt());
6617 // TODO: The standard is not clear how to resolve two thread limit clauses,
6618 // let's pick the teams one if it's present, otherwise the target one.
6619 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6620 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6621 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6622 ThreadLimitClause = TLC;
6623 if (ThreadLimitExpr) {
6624 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6625 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6627 CGF,
6628 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6629 if (const auto *PreInit =
6630 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6631 for (const auto *I : PreInit->decls()) {
6632 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6633 CGF.EmitVarDecl(cast<VarDecl>(*I));
6634 } else {
6637 CGF.EmitAutoVarCleanups(Emission);
6638 }
6639 }
6640 }
6641 }
6642 }
6643 }
6644 if (ThreadLimitClause)
6645 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6646 ThreadLimitExpr);
6647 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6648 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6649 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6650 CS = Dir->getInnermostCapturedStmt();
6652 CGF.getContext(), CS->getCapturedStmt());
6653 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6654 }
6655 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6656 CS = Dir->getInnermostCapturedStmt();
6657 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6658 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6659 return ReturnSequential();
6660 }
6661 return NT;
6662 }
6663 case OMPD_target_teams: {
6664 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6665 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6666 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6667 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6668 ThreadLimitExpr);
6669 }
6670 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6671 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6673 CGF.getContext(), CS->getCapturedStmt());
6674 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6675 if (Dir->getDirectiveKind() == OMPD_distribute) {
6676 CS = Dir->getInnermostCapturedStmt();
6677 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6678 }
6679 }
6680 return NT;
6681 }
6682 case OMPD_target_teams_distribute:
6683 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6684 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6685 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6686 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6687 ThreadLimitExpr);
6688 }
6689 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6690 UpperBoundOnly, CondVal);
6691 return NT;
6692 case OMPD_target_teams_loop:
6693 case OMPD_target_parallel_loop:
6694 case OMPD_target_parallel:
6695 case OMPD_target_parallel_for:
6696 case OMPD_target_parallel_for_simd:
6697 case OMPD_target_teams_distribute_parallel_for:
6698 case OMPD_target_teams_distribute_parallel_for_simd: {
6699 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6700 const OMPIfClause *IfClause = nullptr;
6701 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6702 if (C->getNameModifier() == OMPD_unknown ||
6703 C->getNameModifier() == OMPD_parallel) {
6704 IfClause = C;
6705 break;
6706 }
6707 }
6708 if (IfClause) {
6709 const Expr *Cond = IfClause->getCondition();
6710 bool Result;
6711 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6712 if (!Result)
6713 return ReturnSequential();
6714 } else {
6716 *CondVal = CGF.EvaluateExprAsBool(Cond);
6717 }
6718 }
6719 }
6720 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6721 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6722 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6723 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6724 ThreadLimitExpr);
6725 }
6726 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6727 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6728 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6729 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6730 return NumThreadsClause->getNumThreads();
6731 }
6732 return NT;
6733 }
6734 case OMPD_target_teams_distribute_simd:
6735 case OMPD_target_simd:
6736 return ReturnSequential();
6737 default:
6738 break;
6739 }
6740 llvm_unreachable("Unsupported directive kind.");
6741}
6742
6744 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6745 llvm::Value *NumThreadsVal = nullptr;
6746 llvm::Value *CondVal = nullptr;
6747 llvm::Value *ThreadLimitVal = nullptr;
6748 const Expr *ThreadLimitExpr = nullptr;
6749 int32_t UpperBound = -1;
6750
6752 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6753 &ThreadLimitExpr);
6754
6755 // Thread limit expressions are used below, emit them.
6756 if (ThreadLimitExpr) {
6757 ThreadLimitVal =
6758 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6759 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6760 /*isSigned=*/false);
6761 }
6762
6763 // Generate the num teams expression.
6764 if (UpperBound == 1) {
6765 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6766 } else if (NT) {
6767 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6768 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6769 /*isSigned=*/false);
6770 } else if (ThreadLimitVal) {
6771 // If we do not have a num threads value but a thread limit, replace the
6772 // former with the latter. We know handled the thread limit expression.
6773 NumThreadsVal = ThreadLimitVal;
6774 ThreadLimitVal = nullptr;
6775 } else {
6776 // Default to "0" which means runtime choice.
6777 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6778 NumThreadsVal = CGF.Builder.getInt32(0);
6779 }
6780
6781 // Handle if clause. If if clause present, the number of threads is
6782 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6783 if (CondVal) {
6785 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6786 CGF.Builder.getInt32(1));
6787 }
6788
6789 // If the thread limit and num teams expression were present, take the
6790 // minimum.
6791 if (ThreadLimitVal) {
6792 NumThreadsVal = CGF.Builder.CreateSelect(
6793 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6794 ThreadLimitVal, NumThreadsVal);
6795 }
6796
6797 return NumThreadsVal;
6798}
6799
6800namespace {
6802
6803// Utility to handle information from clauses associated with a given
6804// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6805// It provides a convenient interface to obtain the information and generate
6806// code for that information.
6807class MappableExprsHandler {
6808public:
6809 /// Custom comparator for attach-pointer expressions that compares them by
6810 /// complexity (i.e. their component-depth) first, then by the order in which
6811 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6812 /// different.
6813 struct AttachPtrExprComparator {
6814 const MappableExprsHandler &Handler;
6815 // Cache of previous equality comparison results.
6816 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6817 CachedEqualityComparisons;
6818
6819 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6820 AttachPtrExprComparator() = delete;
6821
6822 // Return true iff LHS is "less than" RHS.
6823 bool operator()(const Expr *LHS, const Expr *RHS) const {
6824 if (LHS == RHS)
6825 return false;
6826
6827 // First, compare by complexity (depth)
6828 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6829 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6830
6831 std::optional<size_t> DepthLHS =
6832 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6833 : std::nullopt;
6834 std::optional<size_t> DepthRHS =
6835 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6836 : std::nullopt;
6837
6838 // std::nullopt (no attach pointer) has lowest complexity
6839 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6840 // Both have same complexity, now check semantic equality
6841 if (areEqual(LHS, RHS))
6842 return false;
6843 // Different semantically, compare by computation order
6844 return wasComputedBefore(LHS, RHS);
6845 }
6846 if (!DepthLHS.has_value())
6847 return true; // LHS has lower complexity
6848 if (!DepthRHS.has_value())
6849 return false; // RHS has lower complexity
6850
6851 // Both have values, compare by depth (lower depth = lower complexity)
6852 if (DepthLHS.value() != DepthRHS.value())
6853 return DepthLHS.value() < DepthRHS.value();
6854
6855 // Same complexity, now check semantic equality
6856 if (areEqual(LHS, RHS))
6857 return false;
6858 // Different semantically, compare by computation order
6859 return wasComputedBefore(LHS, RHS);
6860 }
6861
6862 public:
6863 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
6864 /// results, if available, otherwise does a recursive semantic comparison.
6865 bool areEqual(const Expr *LHS, const Expr *RHS) const {
6866 // Check cache first for faster lookup
6867 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
6868 if (CachedResultIt != CachedEqualityComparisons.end())
6869 return CachedResultIt->second;
6870
6871 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
6872
6873 // Cache the result for future lookups (both orders since semantic
6874 // equality is commutative)
6875 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
6876 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
6877 return ComparisonResult;
6878 }
6879
6880 /// Compare the two attach-ptr expressions by their computation order.
6881 /// Returns true iff LHS was computed before RHS by
6882 /// collectAttachPtrExprInfo().
6883 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
6884 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
6885 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
6886
6887 return OrderLHS < OrderRHS;
6888 }
6889
6890 private:
6891 /// Helper function to compare attach-pointer expressions semantically.
6892 /// This function handles various expression types that can be part of an
6893 /// attach-pointer.
6894 /// TODO: Not urgent, but we should ideally return true when comparing
6895 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
6896 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
6897 if (LHS == RHS)
6898 return true;
6899
6900 // If only one is null, they aren't equal
6901 if (!LHS || !RHS)
6902 return false;
6903
6904 ASTContext &Ctx = Handler.CGF.getContext();
6905 // Strip away parentheses and no-op casts to get to the core expression
6906 LHS = LHS->IgnoreParenNoopCasts(Ctx);
6907 RHS = RHS->IgnoreParenNoopCasts(Ctx);
6908
6909 // Direct pointer comparison of the underlying expressions
6910 if (LHS == RHS)
6911 return true;
6912
6913 // Check if the expression classes match
6914 if (LHS->getStmtClass() != RHS->getStmtClass())
6915 return false;
6916
6917 // Handle DeclRefExpr (variable references)
6918 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
6919 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
6920 if (!RD)
6921 return false;
6922 return LD->getDecl()->getCanonicalDecl() ==
6923 RD->getDecl()->getCanonicalDecl();
6924 }
6925
6926 // Handle ArraySubscriptExpr (array indexing like a[i])
6927 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
6928 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
6929 if (!RA)
6930 return false;
6931 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
6932 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
6933 }
6934
6935 // Handle MemberExpr (member access like s.m or p->m)
6936 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
6937 const auto *RM = dyn_cast<MemberExpr>(RHS);
6938 if (!RM)
6939 return false;
6940 if (LM->getMemberDecl()->getCanonicalDecl() !=
6941 RM->getMemberDecl()->getCanonicalDecl())
6942 return false;
6943 return areSemanticallyEqual(LM->getBase(), RM->getBase());
6944 }
6945
6946 // Handle UnaryOperator (unary operations like *p, &x, etc.)
6947 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
6948 const auto *RU = dyn_cast<UnaryOperator>(RHS);
6949 if (!RU)
6950 return false;
6951 if (LU->getOpcode() != RU->getOpcode())
6952 return false;
6953 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
6954 }
6955
6956 // Handle BinaryOperator (binary operations like p + offset)
6957 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
6958 const auto *RB = dyn_cast<BinaryOperator>(RHS);
6959 if (!RB)
6960 return false;
6961 if (LB->getOpcode() != RB->getOpcode())
6962 return false;
6963 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
6964 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
6965 }
6966
6967 // Handle ArraySectionExpr (array sections like a[0:1])
6968 // Attach pointers should not contain array-sections, but currently we
6969 // don't emit an error.
6970 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
6971 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
6972 if (!RAS)
6973 return false;
6974 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
6975 areSemanticallyEqual(LAS->getLowerBound(),
6976 RAS->getLowerBound()) &&
6977 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
6978 }
6979
6980 // Handle CastExpr (explicit casts)
6981 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
6982 const auto *RC = dyn_cast<CastExpr>(RHS);
6983 if (!RC)
6984 return false;
6985 if (LC->getCastKind() != RC->getCastKind())
6986 return false;
6987 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
6988 }
6989
6990 // Handle CXXThisExpr (this pointer)
6991 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
6992 return true;
6993
6994 // Handle IntegerLiteral (integer constants)
6995 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
6996 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
6997 if (!RI)
6998 return false;
6999 return LI->getValue() == RI->getValue();
7000 }
7001
7002 // Handle CharacterLiteral (character constants)
7003 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7004 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7005 if (!RC)
7006 return false;
7007 return LC->getValue() == RC->getValue();
7008 }
7009
7010 // Handle FloatingLiteral (floating point constants)
7011 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7012 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7013 if (!RF)
7014 return false;
7015 // Use bitwise comparison for floating point literals
7016 return LF->getValue().bitwiseIsEqual(RF->getValue());
7017 }
7018
7019 // Handle StringLiteral (string constants)
7020 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7021 const auto *RS = dyn_cast<StringLiteral>(RHS);
7022 if (!RS)
7023 return false;
7024 return LS->getString() == RS->getString();
7025 }
7026
7027 // Handle CXXNullPtrLiteralExpr (nullptr)
7029 return true;
7030
7031 // Handle CXXBoolLiteralExpr (true/false)
7032 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7033 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7034 if (!RB)
7035 return false;
7036 return LB->getValue() == RB->getValue();
7037 }
7038
7039 // Fallback for other forms - use the existing comparison method
7040 return Expr::isSameComparisonOperand(LHS, RHS);
7041 }
7042 };
7043
7044 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7045 static unsigned getFlagMemberOffset() {
7046 unsigned Offset = 0;
7047 for (uint64_t Remain =
7048 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7049 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7050 !(Remain & 1); Remain = Remain >> 1)
7051 Offset++;
7052 return Offset;
7053 }
7054
7055 /// Class that holds debugging information for a data mapping to be passed to
7056 /// the runtime library.
7057 class MappingExprInfo {
7058 /// The variable declaration used for the data mapping.
7059 const ValueDecl *MapDecl = nullptr;
7060 /// The original expression used in the map clause, or null if there is
7061 /// none.
7062 const Expr *MapExpr = nullptr;
7063
7064 public:
7065 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7066 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7067
7068 const ValueDecl *getMapDecl() const { return MapDecl; }
7069 const Expr *getMapExpr() const { return MapExpr; }
7070 };
7071
7072 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7073 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7074 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7075 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7076 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7077 using MapNonContiguousArrayTy =
7078 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7079 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7080 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7081 using MapData =
7083 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7084 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7085 using MapDataArrayTy = SmallVector<MapData, 4>;
7086
7087 /// This structure contains combined information generated for mappable
7088 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7089 /// mappers, and non-contiguous information.
7090 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7091 MapExprsArrayTy Exprs;
7092 MapValueDeclsArrayTy Mappers;
7093 MapValueDeclsArrayTy DevicePtrDecls;
7094
7095 /// Append arrays in \a CurInfo.
7096 void append(MapCombinedInfoTy &CurInfo) {
7097 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7098 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7099 CurInfo.DevicePtrDecls.end());
7100 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7101 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7102 }
7103 };
7104
7105 /// Map between a struct and the its lowest & highest elements which have been
7106 /// mapped.
7107 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7108 /// HE(FieldIndex, Pointer)}
7109 struct StructRangeInfoTy {
7110 MapCombinedInfoTy PreliminaryMapData;
7111 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7112 0, Address::invalid()};
7113 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7114 0, Address::invalid()};
7115 Address Base = Address::invalid();
7116 Address LB = Address::invalid();
7117 bool IsArraySection = false;
7118 bool HasCompleteRecord = false;
7119 };
7120
7121 /// A struct to store the attach pointer and pointee information, to be used
7122 /// when emitting an attach entry.
7123 struct AttachInfoTy {
7124 Address AttachPtrAddr = Address::invalid();
7125 Address AttachPteeAddr = Address::invalid();
7126 const ValueDecl *AttachPtrDecl = nullptr;
7127 const Expr *AttachMapExpr = nullptr;
7128
7129 bool isValid() const {
7130 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7131 }
7132 };
7133
7134 /// Check if there's any component list where the attach pointer expression
7135 /// matches the given captured variable.
7136 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7137 for (const auto &AttachEntry : AttachPtrExprMap) {
7138 if (AttachEntry.second) {
7139 // Check if the attach pointer expression is a DeclRefExpr that
7140 // references the captured variable
7141 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7142 if (DRE->getDecl() == VD)
7143 return true;
7144 }
7145 }
7146 return false;
7147 }
7148
7149 /// Get the previously-cached attach pointer for a component list, if-any.
7150 const Expr *getAttachPtrExpr(
7152 const {
7153 const auto It = AttachPtrExprMap.find(Components);
7154 if (It != AttachPtrExprMap.end())
7155 return It->second;
7156
7157 return nullptr;
7158 }
7159
7160private:
7161 /// Kind that defines how a device pointer has to be returned.
7162 struct MapInfo {
7165 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7166 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7167 bool ReturnDevicePointer = false;
7168 bool IsImplicit = false;
7169 const ValueDecl *Mapper = nullptr;
7170 const Expr *VarRef = nullptr;
7171 bool ForDeviceAddr = false;
7172
7173 MapInfo() = default;
7174 MapInfo(
7176 OpenMPMapClauseKind MapType,
7177 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7178 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7179 bool ReturnDevicePointer, bool IsImplicit,
7180 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7181 bool ForDeviceAddr = false)
7182 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7183 MotionModifiers(MotionModifiers),
7184 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7185 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7186 };
7187
7188 /// The target directive from where the mappable clauses were extracted. It
7189 /// is either a executable directive or a user-defined mapper directive.
7190 llvm::PointerUnion<const OMPExecutableDirective *,
7191 const OMPDeclareMapperDecl *>
7192 CurDir;
7193
7194 /// Function the directive is being generated for.
7195 CodeGenFunction &CGF;
7196
7197 /// Set of all first private variables in the current directive.
7198 /// bool data is set to true if the variable is implicitly marked as
7199 /// firstprivate, false otherwise.
7200 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7201
7202 /// Set of defaultmap clause kinds that use firstprivate behavior.
7203 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7204
7205 /// Map between device pointer declarations and their expression components.
7206 /// The key value for declarations in 'this' is null.
7207 llvm::DenseMap<
7208 const ValueDecl *,
7209 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7210 DevPointersMap;
7211
7212 /// Map between device addr declarations and their expression components.
7213 /// The key value for declarations in 'this' is null.
7214 llvm::DenseMap<
7215 const ValueDecl *,
7216 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7217 HasDevAddrsMap;
7218
7219 /// Map between lambda declarations and their map type.
7220 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7221
7222 /// Map from component lists to their attach pointer expressions.
7224 const Expr *>
7225 AttachPtrExprMap;
7226
7227 /// Map from attach pointer expressions to their component depth.
7228 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7229 /// expressions with increasing/decreasing depth.
7230 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7231 /// TODO: Not urgent, but we should ideally use the number of pointer
7232 /// dereferences in an expr as an indicator of its complexity, instead of the
7233 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7234 /// `*(p + 5 + 5)` together.
7235 llvm::DenseMap<const Expr *, std::optional<size_t>>
7236 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7237
7238 /// Map from attach pointer expressions to the order they were computed in, in
7239 /// collectAttachPtrExprInfo().
7240 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7241 {nullptr, 0}};
7242
7243 /// An instance of attach-ptr-expr comparator that can be used throughout the
7244 /// lifetime of this handler.
7245 AttachPtrExprComparator AttachPtrComparator;
7246
7247 llvm::Value *getExprTypeSize(const Expr *E) const {
7248 QualType ExprTy = E->getType().getCanonicalType();
7249
7250 // Calculate the size for array shaping expression.
7251 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7252 llvm::Value *Size =
7253 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7254 for (const Expr *SE : OAE->getDimensions()) {
7255 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7256 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7257 CGF.getContext().getSizeType(),
7258 SE->getExprLoc());
7259 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7260 }
7261 return Size;
7262 }
7263
7264 // Reference types are ignored for mapping purposes.
7265 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7266 ExprTy = RefTy->getPointeeType().getCanonicalType();
7267
7268 // Given that an array section is considered a built-in type, we need to
7269 // do the calculation based on the length of the section instead of relying
7270 // on CGF.getTypeSize(E->getType()).
7271 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7272 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7273 OAE->getBase()->IgnoreParenImpCasts())
7275
7276 // If there is no length associated with the expression and lower bound is
7277 // not specified too, that means we are using the whole length of the
7278 // base.
7279 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7280 !OAE->getLowerBound())
7281 return CGF.getTypeSize(BaseTy);
7282
7283 llvm::Value *ElemSize;
7284 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7285 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7286 } else {
7287 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7288 assert(ATy && "Expecting array type if not a pointer type.");
7289 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7290 }
7291
7292 // If we don't have a length at this point, that is because we have an
7293 // array section with a single element.
7294 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7295 return ElemSize;
7296
7297 if (const Expr *LenExpr = OAE->getLength()) {
7298 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7299 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7300 CGF.getContext().getSizeType(),
7301 LenExpr->getExprLoc());
7302 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7303 }
7304 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7305 OAE->getLowerBound() && "expected array_section[lb:].");
7306 // Size = sizetype - lb * elemtype;
7307 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7308 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7309 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7310 CGF.getContext().getSizeType(),
7311 OAE->getLowerBound()->getExprLoc());
7312 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7313 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7314 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7315 LengthVal = CGF.Builder.CreateSelect(
7316 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7317 return LengthVal;
7318 }
7319 return CGF.getTypeSize(ExprTy);
7320 }
7321
7322 /// Return the corresponding bits for a given map clause modifier. Add
7323 /// a flag marking the map as a pointer if requested. Add a flag marking the
7324 /// map as the first one of a series of maps that relate to the same map
7325 /// expression.
7326 OpenMPOffloadMappingFlags getMapTypeBits(
7327 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7328 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7329 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7330 OpenMPOffloadMappingFlags Bits =
7331 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7332 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7333 switch (MapType) {
7334 case OMPC_MAP_alloc:
7335 case OMPC_MAP_release:
7336 // alloc and release is the default behavior in the runtime library, i.e.
7337 // if we don't pass any bits alloc/release that is what the runtime is
7338 // going to do. Therefore, we don't need to signal anything for these two
7339 // type modifiers.
7340 break;
7341 case OMPC_MAP_to:
7342 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7343 break;
7344 case OMPC_MAP_from:
7345 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7346 break;
7347 case OMPC_MAP_tofrom:
7348 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7349 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7350 break;
7351 case OMPC_MAP_delete:
7352 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7353 break;
7354 case OMPC_MAP_unknown:
7355 llvm_unreachable("Unexpected map type!");
7356 }
7357 if (AddPtrFlag)
7358 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7359 if (AddIsTargetParamFlag)
7360 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7361 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7362 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7363 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7364 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7365 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7366 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7367 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7368 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7369 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7370 if (IsNonContiguous)
7371 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7372 return Bits;
7373 }
7374
7375 /// Return true if the provided expression is a final array section. A
7376 /// final array section, is one whose length can't be proved to be one.
7377 bool isFinalArraySectionExpression(const Expr *E) const {
7378 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7379
7380 // It is not an array section and therefore not a unity-size one.
7381 if (!OASE)
7382 return false;
7383
7384 // An array section with no colon always refer to a single element.
7385 if (OASE->getColonLocFirst().isInvalid())
7386 return false;
7387
7388 const Expr *Length = OASE->getLength();
7389
7390 // If we don't have a length we have to check if the array has size 1
7391 // for this dimension. Also, we should always expect a length if the
7392 // base type is pointer.
7393 if (!Length) {
7394 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7395 OASE->getBase()->IgnoreParenImpCasts())
7397 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7398 return ATy->getSExtSize() != 1;
7399 // If we don't have a constant dimension length, we have to consider
7400 // the current section as having any size, so it is not necessarily
7401 // unitary. If it happen to be unity size, that's user fault.
7402 return true;
7403 }
7404
7405 // Check if the length evaluates to 1.
7406 Expr::EvalResult Result;
7407 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7408 return true; // Can have more that size 1.
7409
7410 llvm::APSInt ConstLength = Result.Val.getInt();
7411 return ConstLength.getSExtValue() != 1;
7412 }
7413
7414 /// Emit an attach entry into \p CombinedInfo, using the information from \p
7415 /// AttachInfo. For example, for a map of form `int *p; ... map(p[1:10])`,
7416 /// an attach entry has the following form:
7417 /// &p, &p[1], sizeof(void*), ATTACH
7418 void emitAttachEntry(CodeGenFunction &CGF, MapCombinedInfoTy &CombinedInfo,
7419 const AttachInfoTy &AttachInfo) const {
7420 assert(AttachInfo.isValid() &&
7421 "Expected valid attach pointer/pointee information!");
7422
7423 // Size is the size of the pointer itself - use pointer size, not BaseDecl
7424 // size
7425 llvm::Value *PointerSize = CGF.Builder.CreateIntCast(
7426 llvm::ConstantInt::get(
7427 CGF.CGM.SizeTy, CGF.getContext()
7429 .getQuantity()),
7430 CGF.Int64Ty, /*isSigned=*/true);
7431
7432 CombinedInfo.Exprs.emplace_back(AttachInfo.AttachPtrDecl,
7433 AttachInfo.AttachMapExpr);
7434 CombinedInfo.BasePointers.push_back(
7435 AttachInfo.AttachPtrAddr.emitRawPointer(CGF));
7436 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7437 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7438 CombinedInfo.Pointers.push_back(
7439 AttachInfo.AttachPteeAddr.emitRawPointer(CGF));
7440 CombinedInfo.Sizes.push_back(PointerSize);
7441 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_ATTACH);
7442 CombinedInfo.Mappers.push_back(nullptr);
7443 CombinedInfo.NonContigInfo.Dims.push_back(1);
7444 }
7445
7446 /// A helper class to copy structures with overlapped elements, i.e. those
7447 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7448 /// are not explicitly copied have mapping nodes synthesized for them,
7449 /// taking care to avoid generating zero-sized copies.
7450 class CopyOverlappedEntryGaps {
7451 CodeGenFunction &CGF;
7452 MapCombinedInfoTy &CombinedInfo;
7453 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7454 const ValueDecl *MapDecl = nullptr;
7455 const Expr *MapExpr = nullptr;
7456 Address BP = Address::invalid();
7457 bool IsNonContiguous = false;
7458 uint64_t DimSize = 0;
7459 // These elements track the position as the struct is iterated over
7460 // (in order of increasing element address).
7461 const RecordDecl *LastParent = nullptr;
7462 uint64_t Cursor = 0;
7463 unsigned LastIndex = -1u;
7464 Address LB = Address::invalid();
7465
7466 public:
7467 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7468 MapCombinedInfoTy &CombinedInfo,
7469 OpenMPOffloadMappingFlags Flags,
7470 const ValueDecl *MapDecl, const Expr *MapExpr,
7471 Address BP, Address LB, bool IsNonContiguous,
7472 uint64_t DimSize)
7473 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7474 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7475 DimSize(DimSize), LB(LB) {}
7476
7477 void processField(
7478 const OMPClauseMappableExprCommon::MappableComponent &MC,
7479 const FieldDecl *FD,
7480 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7481 EmitMemberExprBase) {
7482 const RecordDecl *RD = FD->getParent();
7483 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7484 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7485 uint64_t FieldSize =
7487 Address ComponentLB = Address::invalid();
7488
7489 if (FD->getType()->isLValueReferenceType()) {
7490 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7491 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7492 ComponentLB =
7493 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7494 } else {
7495 ComponentLB =
7497 }
7498
7499 if (!LastParent)
7500 LastParent = RD;
7501 if (FD->getParent() == LastParent) {
7502 if (FD->getFieldIndex() != LastIndex + 1)
7503 copyUntilField(FD, ComponentLB);
7504 } else {
7505 LastParent = FD->getParent();
7506 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7507 copyUntilField(FD, ComponentLB);
7508 }
7509 Cursor = FieldOffset + FieldSize;
7510 LastIndex = FD->getFieldIndex();
7511 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7512 }
7513
7514 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7515 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7516 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7517 llvm::Value *Size =
7518 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7519 copySizedChunk(LBPtr, Size);
7520 }
7521
7522 void copyUntilEnd(Address HB) {
7523 if (LastParent) {
7524 const ASTRecordLayout &RL =
7525 CGF.getContext().getASTRecordLayout(LastParent);
7526 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7527 return;
7528 }
7529 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7530 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7531 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7532 LBPtr);
7533 copySizedChunk(LBPtr, Size);
7534 }
7535
7536 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7537 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7538 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7539 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7540 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7541 CombinedInfo.Pointers.push_back(Base);
7542 CombinedInfo.Sizes.push_back(
7543 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7544 CombinedInfo.Types.push_back(Flags);
7545 CombinedInfo.Mappers.push_back(nullptr);
7546 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7547 }
7548 };
7549
7550 /// Generate the base pointers, section pointers, sizes, map type bits, and
7551 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7552 /// map type, map or motion modifiers, and expression components.
7553 /// \a IsFirstComponent should be set to true if the provided set of
7554 /// components is the first associated with a capture.
7555 void generateInfoForComponentList(
7556 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7557 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7559 MapCombinedInfoTy &CombinedInfo,
7560 MapCombinedInfoTy &StructBaseCombinedInfo,
7561 StructRangeInfoTy &PartialStruct, AttachInfoTy &AttachInfo,
7562 bool IsFirstComponentList, bool IsImplicit,
7563 bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr,
7564 bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr,
7565 const Expr *MapExpr = nullptr,
7566 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7567 OverlappedElements = {}) const {
7568
7569 // The following summarizes what has to be generated for each map and the
7570 // types below. The generated information is expressed in this order:
7571 // base pointer, section pointer, size, flags
7572 // (to add to the ones that come from the map type and modifier).
7573 // Entries annotated with (+) are only generated for "target" constructs,
7574 // and only if the variable at the beginning of the expression is used in
7575 // the region.
7576 //
7577 // double d;
7578 // int i[100];
7579 // float *p;
7580 // int **a = &i;
7581 //
7582 // struct S1 {
7583 // int i;
7584 // float f[50];
7585 // }
7586 // struct S2 {
7587 // int i;
7588 // float f[50];
7589 // S1 s;
7590 // double *p;
7591 // double *&pref;
7592 // struct S2 *ps;
7593 // int &ref;
7594 // }
7595 // S2 s;
7596 // S2 *ps;
7597 //
7598 // map(d)
7599 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7600 //
7601 // map(i)
7602 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7603 //
7604 // map(i[1:23])
7605 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7606 //
7607 // map(p)
7608 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7609 //
7610 // map(p[1:24])
7611 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // map pointee
7612 // &p, &p[1], sizeof(void*), ATTACH // attach pointer/pointee, if both
7613 // // are present, and either is new
7614 //
7615 // map(([22])p)
7616 // p, p, 22*sizeof(float), TARGET_PARAM | TO | FROM
7617 // &p, p, sizeof(void*), ATTACH
7618 //
7619 // map((*a)[0:3])
7620 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7621 // (*a)[0], &(*a)[0], 3 * sizeof(int), TO | FROM
7622 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7623 // (+) Only on target, if a is used in the region
7624 // Note: Since the attach base-pointer is `*a`, which is not a scalar
7625 // variable, it doesn't determine the clause on `a`. `a` is mapped using
7626 // a zero-length-array-section map by generateDefaultMapInfo, if it is
7627 // referenced in the target region, because it is a pointer.
7628 //
7629 // map(**a)
7630 // a, a, 0, TARGET_PARAM | IMPLICIT // (+)
7631 // &(*a)[0], &(*a)[0], sizeof(int), TO | FROM
7632 // &(*a), &(*a)[0], sizeof(void*), ATTACH
7633 // (+) Only on target, if a is used in the region
7634 //
7635 // map(s)
7636 // FIXME: This needs to also imply map(ref_ptr_ptee: s.ref), since the
7637 // effect is supposed to be same as if the user had a map for every element
7638 // of the struct. We currently do a shallow-map of s.
7639 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7640 //
7641 // map(s.i)
7642 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7643 //
7644 // map(s.s.f)
7645 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7646 //
7647 // map(s.p)
7648 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7649 //
7650 // map(to: s.p[:22])
7651 // &s, &(s.p), sizeof(double*), TARGET_PARAM | IMPLICIT // (+)
7652 // &(s.p[0]), &(s.p[0]), 22 * sizeof(double*), TO | FROM
7653 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7654 //
7655 // map(to: s.ref)
7656 // &s, &(ptr(s.ref)), sizeof(int*), TARGET_PARAM (*)
7657 // &s, &(ptee(s.ref)), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7658 // (*) alloc space for struct members, only this is a target parameter.
7659 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7660 // optimizes this entry out, same in the examples below)
7661 // (***) map the pointee (map: to)
7662 // Note: ptr(s.ref) represents the referring pointer of s.ref
7663 // ptee(s.ref) represents the referenced pointee of s.ref
7664 //
7665 // map(to: s.pref)
7666 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM
7667 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7668 //
7669 // map(to: s.pref[:22])
7670 // &s, &(ptr(s.pref)), sizeof(double**), TARGET_PARAM | IMPLICIT // (+)
7671 // &s, &(ptee(s.pref)), sizeof(double*), MEMBER_OF(1) | PTR_AND_OBJ | TO |
7672 // FROM | IMPLICIT // (+)
7673 // &(ptee(s.pref)[0]), &(ptee(s.pref)[0]), 22 * sizeof(double), TO
7674 // &(ptee(s.pref)), &(ptee(s.pref)[0]), sizeof(void*), ATTACH
7675 //
7676 // map(s.ps)
7677 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7678 //
7679 // map(from: s.ps->s.i)
7680 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7681 // &(s.ps[0]), &(s.ps->s.i), sizeof(int), FROM
7682 // &(s.ps), &(s.ps->s.i), sizeof(void*), ATTACH
7683 //
7684 // map(to: s.ps->ps)
7685 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7686 // &(s.ps[0]), &(s.ps->ps), sizeof(S2*), TO
7687 // &(s.ps), &(s.ps->ps), sizeof(void*), ATTACH
7688 //
7689 // map(s.ps->ps->ps)
7690 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7691 // &(s.ps->ps[0]), &(s.ps->ps->ps), sizeof(S2*), TO
7692 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(void*), ATTACH
7693 //
7694 // map(to: s.ps->ps->s.f[:22])
7695 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM | IMPLICIT // (+)
7696 // &(s.ps->ps[0]), &(s.ps->ps->s.f[0]), 22*sizeof(float), TO
7697 // &(s.ps->ps), &(s.ps->ps->s.f[0]), sizeof(void*), ATTACH
7698 //
7699 // map(ps)
7700 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7701 //
7702 // map(ps->i)
7703 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7704 // &ps, &(ps->i), sizeof(void*), ATTACH
7705 //
7706 // map(ps->s.f)
7707 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7708 // &ps, &(ps->s.f[0]), sizeof(ps), ATTACH
7709 //
7710 // map(from: ps->p)
7711 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7712 // &ps, &(ps->p), sizeof(ps), ATTACH
7713 //
7714 // map(to: ps->p[:22])
7715 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7716 // &(ps->p[0]), &(ps->p[0]), 22*sizeof(double), TO
7717 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7718 //
7719 // map(ps->ps)
7720 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7721 // &ps, &(ps->ps), sizeof(ps), ATTACH
7722 //
7723 // map(from: ps->ps->s.i)
7724 // ps, &(ps[0]), 0, TARGET_PARAM | IMPLICIT // (+)
7725 // &(ps->ps[0]), &(ps->ps->s.i), sizeof(int), FROM
7726 // &(ps->ps), &(ps->ps->s.i), sizeof(void*), ATTACH
7727 //
7728 // map(from: ps->ps->ps)
7729 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7730 // &(ps->ps[0]), &(ps->ps->ps), sizeof(S2*), FROM
7731 // &(ps->ps), &(ps->ps->ps), sizeof(void*), ATTACH
7732 //
7733 // map(ps->ps->ps->ps)
7734 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7735 // &(ps->ps->ps[0]), &(ps->ps->ps->ps), sizeof(S2*), FROM
7736 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(void*), ATTACH
7737 //
7738 // map(to: ps->ps->ps->s.f[:22])
7739 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7740 // &(ps->ps->ps[0]), &(ps->ps->ps->s.f[0]), 22*sizeof(float), TO
7741 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), sizeof(void*), ATTACH
7742 //
7743 // map(to: s.f[:22]) map(from: s.p[:33])
7744 // On target, and if s is used in the region:
7745 //
7746 // &s, &(s.f[0]), 50*sizeof(float) +
7747 // sizeof(struct S1) +
7748 // sizeof(double*) (**), TARGET_PARAM
7749 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7750 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) | TO |
7751 // FROM | IMPLICIT
7752 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7753 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7754 // (**) allocate contiguous space needed to fit all mapped members even if
7755 // we allocate space for members not mapped (in this example,
7756 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7757 // them as well because they fall between &s.f[0] and &s.p)
7758 //
7759 // On other constructs, and, if s is not used in the region, on target:
7760 // &s, &(s.f[0]), 22*sizeof(float), TO
7761 // &(s.p[0]), &(s.p[0]), 33*sizeof(double), FROM
7762 // &(s.p), &(s.p[0]), sizeof(void*), ATTACH
7763 //
7764 // map(from: s.f[:22]) map(to: ps->p[:33])
7765 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7766 // &ps[0], &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7767 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7768 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7769 //
7770 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7771 // &s, &(s.f[0]), 50*sizeof(float) +
7772 // sizeof(struct S1), TARGET_PARAM
7773 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7774 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7775 // ps, &ps[0], 0, TARGET_PARAM | IMPLICIT // (+)
7776 // &(ps->p[0]), &(ps->p[0]), 33*sizeof(double), TO
7777 // &(ps->p), &(ps->p[0]), sizeof(void*), ATTACH
7778 //
7779 // map(p[:100], p)
7780 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7781 // p, &p[0], 100*sizeof(float), TO | FROM
7782 // &p, &p[0], sizeof(float*), ATTACH
7783
7784 // Track if the map information being generated is the first for a capture.
7785 bool IsCaptureFirstInfo = IsFirstComponentList;
7786 // When the variable is on a declare target link or in a to clause with
7787 // unified memory, a reference is needed to hold the host/device address
7788 // of the variable.
7789 bool RequiresReference = false;
7790
7791 // Scan the components from the base to the complete expression.
7792 auto CI = Components.rbegin();
7793 auto CE = Components.rend();
7794 auto I = CI;
7795
7796 // Track if the map information being generated is the first for a list of
7797 // components.
7798 bool IsExpressionFirstInfo = true;
7799 bool FirstPointerInComplexData = false;
7800 Address BP = Address::invalid();
7801 Address FinalLowestElem = Address::invalid();
7802 const Expr *AssocExpr = I->getAssociatedExpression();
7803 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7804 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7805 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7806
7807 // Get the pointer-attachment base-pointer for the given list, if any.
7808 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
7809 auto [AttachPtrAddr, AttachPteeBaseAddr] =
7810 getAttachPtrAddrAndPteeBaseAddr(AttachPtrExpr, CGF);
7811
7812 bool HasAttachPtr = AttachPtrExpr != nullptr;
7813 bool FirstComponentIsForAttachPtr = AssocExpr == AttachPtrExpr;
7814 bool SeenAttachPtr = FirstComponentIsForAttachPtr;
7815
7816 if (FirstComponentIsForAttachPtr) {
7817 // No need to process AttachPtr here. It will be processed at the end
7818 // after we have computed the pointee's address.
7819 ++I;
7820 } else if (isa<MemberExpr>(AssocExpr)) {
7821 // The base is the 'this' pointer. The content of the pointer is going
7822 // to be the base of the field being mapped.
7823 BP = CGF.LoadCXXThisAddress();
7824 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7825 (OASE &&
7826 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7827 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7828 } else if (OAShE &&
7829 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7830 BP = Address(
7831 CGF.EmitScalarExpr(OAShE->getBase()),
7832 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7833 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7834 } else {
7835 // The base is the reference to the variable.
7836 // BP = &Var.
7837 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7838 if (const auto *VD =
7839 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7840 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7841 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7842 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7843 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7844 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7846 RequiresReference = true;
7848 }
7849 }
7850 }
7851
7852 // If the variable is a pointer and is being dereferenced (i.e. is not
7853 // the last component), the base has to be the pointer itself, not its
7854 // reference. References are ignored for mapping purposes.
7855 QualType Ty =
7856 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7857 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7858 // No need to generate individual map information for the pointer, it
7859 // can be associated with the combined storage if shared memory mode is
7860 // active or the base declaration is not global variable.
7861 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7863 !VD || VD->hasLocalStorage() || HasAttachPtr)
7864 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7865 else
7866 FirstPointerInComplexData = true;
7867 ++I;
7868 }
7869 }
7870
7871 // Track whether a component of the list should be marked as MEMBER_OF some
7872 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7873 // in a component list should be marked as MEMBER_OF, all subsequent entries
7874 // do not belong to the base struct. E.g.
7875 // struct S2 s;
7876 // s.ps->ps->ps->f[:]
7877 // (1) (2) (3) (4)
7878 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7879 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7880 // is the pointee of ps(2) which is not member of struct s, so it should not
7881 // be marked as such (it is still PTR_AND_OBJ).
7882 // The variable is initialized to false so that PTR_AND_OBJ entries which
7883 // are not struct members are not considered (e.g. array of pointers to
7884 // data).
7885 bool ShouldBeMemberOf = false;
7886
7887 // Variable keeping track of whether or not we have encountered a component
7888 // in the component list which is a member expression. Useful when we have a
7889 // pointer or a final array section, in which case it is the previous
7890 // component in the list which tells us whether we have a member expression.
7891 // E.g. X.f[:]
7892 // While processing the final array section "[:]" it is "f" which tells us
7893 // whether we are dealing with a member of a declared struct.
7894 const MemberExpr *EncounteredME = nullptr;
7895
7896 // Track for the total number of dimension. Start from one for the dummy
7897 // dimension.
7898 uint64_t DimSize = 1;
7899
7900 // Detects non-contiguous updates due to strided accesses.
7901 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7902 // correctly when generating information to be passed to the runtime. The
7903 // flag is set to true if any array section has a stride not equal to 1, or
7904 // if the stride is not a constant expression (conservatively assumed
7905 // non-contiguous).
7906 bool IsNonContiguous =
7907 CombinedInfo.NonContigInfo.IsNonContiguous ||
7908 any_of(Components, [&](const auto &Component) {
7909 const auto *OASE =
7910 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7911 if (!OASE)
7912 return false;
7913
7914 const Expr *StrideExpr = OASE->getStride();
7915 if (!StrideExpr)
7916 return false;
7917
7918 const auto Constant =
7919 StrideExpr->getIntegerConstantExpr(CGF.getContext());
7920 if (!Constant)
7921 return false;
7922
7923 return !Constant->isOne();
7924 });
7925
7926 bool IsPrevMemberReference = false;
7927
7928 bool IsPartialMapped =
7929 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7930
7931 // We need to check if we will be encountering any MEs. If we do not
7932 // encounter any ME expression it means we will be mapping the whole struct.
7933 // In that case we need to skip adding an entry for the struct to the
7934 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7935 // list only when generating all info for clauses.
7936 bool IsMappingWholeStruct = true;
7937 if (!GenerateAllInfoForClauses) {
7938 IsMappingWholeStruct = false;
7939 } else {
7940 for (auto TempI = I; TempI != CE; ++TempI) {
7941 const MemberExpr *PossibleME =
7942 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7943 if (PossibleME) {
7944 IsMappingWholeStruct = false;
7945 break;
7946 }
7947 }
7948 }
7949
7950 bool SeenFirstNonBinOpExprAfterAttachPtr = false;
7951 for (; I != CE; ++I) {
7952 // If we have a valid attach-ptr, we skip processing all components until
7953 // after the attach-ptr.
7954 if (HasAttachPtr && !SeenAttachPtr) {
7955 SeenAttachPtr = I->getAssociatedExpression() == AttachPtrExpr;
7956 continue;
7957 }
7958
7959 // After finding the attach pointer, skip binary-ops, to skip past
7960 // expressions like (p + 10), for a map like map(*(p + 10)), where p is
7961 // the attach-ptr.
7962 if (HasAttachPtr && !SeenFirstNonBinOpExprAfterAttachPtr) {
7963 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7964 if (BO)
7965 continue;
7966
7967 // Found the first non-binary-operator component after attach
7968 SeenFirstNonBinOpExprAfterAttachPtr = true;
7969 BP = AttachPteeBaseAddr;
7970 }
7971
7972 // If the current component is member of a struct (parent struct) mark it.
7973 if (!EncounteredME) {
7974 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7975 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7976 // as MEMBER_OF the parent struct.
7977 if (EncounteredME) {
7978 ShouldBeMemberOf = true;
7979 // Do not emit as complex pointer if this is actually not array-like
7980 // expression.
7981 if (FirstPointerInComplexData) {
7982 QualType Ty = std::prev(I)
7983 ->getAssociatedDeclaration()
7984 ->getType()
7985 .getNonReferenceType();
7986 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7987 FirstPointerInComplexData = false;
7988 }
7989 }
7990 }
7991
7992 auto Next = std::next(I);
7993
7994 // We need to generate the addresses and sizes if this is the last
7995 // component, if the component is a pointer or if it is an array section
7996 // whose length can't be proved to be one. If this is a pointer, it
7997 // becomes the base address for the following components.
7998
7999 // A final array section, is one whose length can't be proved to be one.
8000 // If the map item is non-contiguous then we don't treat any array section
8001 // as final array section.
8002 bool IsFinalArraySection =
8003 !IsNonContiguous &&
8004 isFinalArraySectionExpression(I->getAssociatedExpression());
8005
8006 // If we have a declaration for the mapping use that, otherwise use
8007 // the base declaration of the map clause.
8008 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
8009 ? I->getAssociatedDeclaration()
8010 : BaseDecl;
8011 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
8012 : MapExpr;
8013
8014 // Get information on whether the element is a pointer. Have to do a
8015 // special treatment for array sections given that they are built-in
8016 // types.
8017 const auto *OASE =
8018 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
8019 const auto *OAShE =
8020 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
8021 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8022 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8023 bool IsPointer =
8024 OAShE ||
8027 ->isAnyPointerType()) ||
8028 I->getAssociatedExpression()->getType()->isAnyPointerType();
8029 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8030 MapDecl &&
8031 MapDecl->getType()->isLValueReferenceType();
8032 bool IsNonDerefPointer = IsPointer &&
8033 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
8034 !IsNonContiguous;
8035
8036 if (OASE)
8037 ++DimSize;
8038
8039 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8040 IsFinalArraySection) {
8041 // If this is not the last component, we expect the pointer to be
8042 // associated with an array expression or member expression.
8043 assert((Next == CE ||
8044 isa<MemberExpr>(Next->getAssociatedExpression()) ||
8045 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
8046 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
8047 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
8048 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
8049 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
8050 "Unexpected expression");
8051
8052 Address LB = Address::invalid();
8053 Address LowestElem = Address::invalid();
8054 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8055 const MemberExpr *E) {
8056 const Expr *BaseExpr = E->getBase();
8057 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8058 // scalar.
8059 LValue BaseLV;
8060 if (E->isArrow()) {
8061 LValueBaseInfo BaseInfo;
8062 TBAAAccessInfo TBAAInfo;
8063 Address Addr =
8064 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8065 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8066 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8067 } else {
8068 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8069 }
8070 return BaseLV;
8071 };
8072 if (OAShE) {
8073 LowestElem = LB =
8074 Address(CGF.EmitScalarExpr(OAShE->getBase()),
8076 OAShE->getBase()->getType()->getPointeeType()),
8078 OAShE->getBase()->getType()));
8079 } else if (IsMemberReference) {
8080 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8081 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8082 LowestElem = CGF.EmitLValueForFieldInitialization(
8083 BaseLVal, cast<FieldDecl>(MapDecl))
8084 .getAddress();
8085 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8086 .getAddress();
8087 } else {
8088 LowestElem = LB =
8089 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8090 .getAddress();
8091 }
8092
8093 // Save the final LowestElem, to use it as the pointee in attach maps,
8094 // if emitted.
8095 if (Next == CE)
8096 FinalLowestElem = LowestElem;
8097
8098 // If this component is a pointer inside the base struct then we don't
8099 // need to create any entry for it - it will be combined with the object
8100 // it is pointing to into a single PTR_AND_OBJ entry.
8101 bool IsMemberPointerOrAddr =
8102 EncounteredME &&
8103 (((IsPointer || ForDeviceAddr) &&
8104 I->getAssociatedExpression() == EncounteredME) ||
8105 (IsPrevMemberReference && !IsPointer) ||
8106 (IsMemberReference && Next != CE &&
8107 !Next->getAssociatedExpression()->getType()->isPointerType()));
8108 if (!OverlappedElements.empty() && Next == CE) {
8109 // Handle base element with the info for overlapped elements.
8110 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8111 assert(!IsPointer &&
8112 "Unexpected base element with the pointer type.");
8113 // Mark the whole struct as the struct that requires allocation on the
8114 // device.
8115 PartialStruct.LowestElem = {0, LowestElem};
8116 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8117 I->getAssociatedExpression()->getType());
8118 Address HB = CGF.Builder.CreateConstGEP(
8120 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8121 TypeSize.getQuantity() - 1);
8122 PartialStruct.HighestElem = {
8123 std::numeric_limits<decltype(
8124 PartialStruct.HighestElem.first)>::max(),
8125 HB};
8126 PartialStruct.Base = BP;
8127 PartialStruct.LB = LB;
8128 assert(
8129 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8130 "Overlapped elements must be used only once for the variable.");
8131 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8132 // Emit data for non-overlapped data.
8133 OpenMPOffloadMappingFlags Flags =
8134 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8135 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8136 /*AddPtrFlag=*/false,
8137 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8138 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8139 MapExpr, BP, LB, IsNonContiguous,
8140 DimSize);
8141 // Do bitcopy of all non-overlapped structure elements.
8143 Component : OverlappedElements) {
8144 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8145 Component) {
8146 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8147 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8148 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8149 }
8150 }
8151 }
8152 }
8153 CopyGaps.copyUntilEnd(HB);
8154 break;
8155 }
8156 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8157 // Skip adding an entry in the CurInfo of this combined entry if the
8158 // whole struct is currently being mapped. The struct needs to be added
8159 // in the first position before any data internal to the struct is being
8160 // mapped.
8161 // Skip adding an entry in the CurInfo of this combined entry if the
8162 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8163 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8164 (Next == CE && MapType != OMPC_MAP_unknown)) {
8165 if (!IsMappingWholeStruct) {
8166 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8167 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8168 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8169 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8170 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8171 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8172 Size, CGF.Int64Ty, /*isSigned=*/true));
8173 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8174 : 1);
8175 } else {
8176 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8177 StructBaseCombinedInfo.BasePointers.push_back(
8178 BP.emitRawPointer(CGF));
8179 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8180 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8181 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8182 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8183 Size, CGF.Int64Ty, /*isSigned=*/true));
8184 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8185 IsNonContiguous ? DimSize : 1);
8186 }
8187
8188 // If Mapper is valid, the last component inherits the mapper.
8189 bool HasMapper = Mapper && Next == CE;
8190 if (!IsMappingWholeStruct)
8191 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8192 else
8193 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8194 : nullptr);
8195
8196 // We need to add a pointer flag for each map that comes from the
8197 // same expression except for the first one. We also need to signal
8198 // this map is the first one that relates with the current capture
8199 // (there is a set of entries for each capture).
8200 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8201 MapType, MapModifiers, MotionModifiers, IsImplicit,
8202 !IsExpressionFirstInfo || RequiresReference ||
8203 FirstPointerInComplexData || IsMemberReference,
8204 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8205
8206 if (!IsExpressionFirstInfo || IsMemberReference) {
8207 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8208 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8209 if (IsPointer || (IsMemberReference && Next != CE))
8210 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8211 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8212 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8213 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8214 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8215
8216 if (ShouldBeMemberOf) {
8217 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8218 // should be later updated with the correct value of MEMBER_OF.
8219 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8220 // From now on, all subsequent PTR_AND_OBJ entries should not be
8221 // marked as MEMBER_OF.
8222 ShouldBeMemberOf = false;
8223 }
8224 }
8225
8226 if (!IsMappingWholeStruct)
8227 CombinedInfo.Types.push_back(Flags);
8228 else
8229 StructBaseCombinedInfo.Types.push_back(Flags);
8230 }
8231
8232 // If we have encountered a member expression so far, keep track of the
8233 // mapped member. If the parent is "*this", then the value declaration
8234 // is nullptr.
8235 if (EncounteredME) {
8236 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8237 unsigned FieldIndex = FD->getFieldIndex();
8238
8239 // Update info about the lowest and highest elements for this struct
8240 if (!PartialStruct.Base.isValid()) {
8241 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8242 if (IsFinalArraySection && OASE) {
8243 Address HB =
8244 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8245 .getAddress();
8246 PartialStruct.HighestElem = {FieldIndex, HB};
8247 } else {
8248 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8249 }
8250 PartialStruct.Base = BP;
8251 PartialStruct.LB = BP;
8252 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8253 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8254 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8255 if (IsFinalArraySection && OASE) {
8256 Address HB =
8257 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8258 .getAddress();
8259 PartialStruct.HighestElem = {FieldIndex, HB};
8260 } else {
8261 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8262 }
8263 }
8264 }
8265
8266 // Need to emit combined struct for array sections.
8267 if (IsFinalArraySection || IsNonContiguous)
8268 PartialStruct.IsArraySection = true;
8269
8270 // If we have a final array section, we are done with this expression.
8271 if (IsFinalArraySection)
8272 break;
8273
8274 // The pointer becomes the base for the next element.
8275 if (Next != CE)
8276 BP = IsMemberReference ? LowestElem : LB;
8277 if (!IsPartialMapped)
8278 IsExpressionFirstInfo = false;
8279 IsCaptureFirstInfo = false;
8280 FirstPointerInComplexData = false;
8281 IsPrevMemberReference = IsMemberReference;
8282 } else if (FirstPointerInComplexData) {
8283 QualType Ty = Components.rbegin()
8284 ->getAssociatedDeclaration()
8285 ->getType()
8286 .getNonReferenceType();
8287 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8288 FirstPointerInComplexData = false;
8289 }
8290 }
8291 // If ran into the whole component - allocate the space for the whole
8292 // record.
8293 if (!EncounteredME)
8294 PartialStruct.HasCompleteRecord = true;
8295
8296 // Populate ATTACH information for later processing by emitAttachEntry.
8297 if (shouldEmitAttachEntry(AttachPtrExpr, BaseDecl, CGF, CurDir)) {
8298 AttachInfo.AttachPtrAddr = AttachPtrAddr;
8299 AttachInfo.AttachPteeAddr = FinalLowestElem;
8300 AttachInfo.AttachPtrDecl = BaseDecl;
8301 AttachInfo.AttachMapExpr = MapExpr;
8302 }
8303
8304 if (!IsNonContiguous)
8305 return;
8306
8307 const ASTContext &Context = CGF.getContext();
8308
8309 // For supporting stride in array section, we need to initialize the first
8310 // dimension size as 1, first offset as 0, and first count as 1
8311 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8312 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8313 MapValuesArrayTy CurStrides;
8314 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8315 uint64_t ElementTypeSize;
8316
8317 // Collect Size information for each dimension and get the element size as
8318 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8319 // should be [10, 10] and the first stride is 4 btyes.
8320 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8321 Components) {
8322 const Expr *AssocExpr = Component.getAssociatedExpression();
8323 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8324
8325 if (!OASE)
8326 continue;
8327
8328 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8329 auto *CAT = Context.getAsConstantArrayType(Ty);
8330 auto *VAT = Context.getAsVariableArrayType(Ty);
8331
8332 // We need all the dimension size except for the last dimension.
8333 assert((VAT || CAT || &Component == &*Components.begin()) &&
8334 "Should be either ConstantArray or VariableArray if not the "
8335 "first Component");
8336
8337 // Get element size if CurStrides is empty.
8338 if (CurStrides.empty()) {
8339 const Type *ElementType = nullptr;
8340 if (CAT)
8341 ElementType = CAT->getElementType().getTypePtr();
8342 else if (VAT)
8343 ElementType = VAT->getElementType().getTypePtr();
8344 else
8345 assert(&Component == &*Components.begin() &&
8346 "Only expect pointer (non CAT or VAT) when this is the "
8347 "first Component");
8348 // If ElementType is null, then it means the base is a pointer
8349 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8350 // for next iteration.
8351 if (ElementType) {
8352 // For the case that having pointer as base, we need to remove one
8353 // level of indirection.
8354 if (&Component != &*Components.begin())
8355 ElementType = ElementType->getPointeeOrArrayElementType();
8356 ElementTypeSize =
8357 Context.getTypeSizeInChars(ElementType).getQuantity();
8358 CurStrides.push_back(
8359 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8360 }
8361 }
8362 // Get dimension value except for the last dimension since we don't need
8363 // it.
8364 if (DimSizes.size() < Components.size() - 1) {
8365 if (CAT)
8366 DimSizes.push_back(
8367 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8368 else if (VAT)
8369 DimSizes.push_back(CGF.Builder.CreateIntCast(
8370 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8371 /*IsSigned=*/false));
8372 }
8373 }
8374
8375 // Skip the dummy dimension since we have already have its information.
8376 auto *DI = DimSizes.begin() + 1;
8377 // Product of dimension.
8378 llvm::Value *DimProd =
8379 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8380
8381 // Collect info for non-contiguous. Notice that offset, count, and stride
8382 // are only meaningful for array-section, so we insert a null for anything
8383 // other than array-section.
8384 // Also, the size of offset, count, and stride are not the same as
8385 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8386 // count, and stride are the same as the number of non-contiguous
8387 // declaration in target update to/from clause.
8388 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8389 Components) {
8390 const Expr *AssocExpr = Component.getAssociatedExpression();
8391
8392 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8393 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8394 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8395 /*isSigned=*/false);
8396 CurOffsets.push_back(Offset);
8397 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8398 CurStrides.push_back(CurStrides.back());
8399 continue;
8400 }
8401
8402 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8403
8404 if (!OASE)
8405 continue;
8406
8407 // Offset
8408 const Expr *OffsetExpr = OASE->getLowerBound();
8409 llvm::Value *Offset = nullptr;
8410 if (!OffsetExpr) {
8411 // If offset is absent, then we just set it to zero.
8412 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8413 } else {
8414 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8415 CGF.Int64Ty,
8416 /*isSigned=*/false);
8417 }
8418 CurOffsets.push_back(Offset);
8419
8420 // Count
8421 const Expr *CountExpr = OASE->getLength();
8422 llvm::Value *Count = nullptr;
8423 if (!CountExpr) {
8424 // In Clang, once a high dimension is an array section, we construct all
8425 // the lower dimension as array section, however, for case like
8426 // arr[0:2][2], Clang construct the inner dimension as an array section
8427 // but it actually is not in an array section form according to spec.
8428 if (!OASE->getColonLocFirst().isValid() &&
8429 !OASE->getColonLocSecond().isValid()) {
8430 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8431 } else {
8432 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8433 // When the length is absent it defaults to ⌈(size −
8434 // lower-bound)/stride⌉, where size is the size of the array
8435 // dimension.
8436 const Expr *StrideExpr = OASE->getStride();
8437 llvm::Value *Stride =
8438 StrideExpr
8439 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8440 CGF.Int64Ty, /*isSigned=*/false)
8441 : nullptr;
8442 if (Stride)
8443 Count = CGF.Builder.CreateUDiv(
8444 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8445 else
8446 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8447 }
8448 } else {
8449 Count = CGF.EmitScalarExpr(CountExpr);
8450 }
8451 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8452 CurCounts.push_back(Count);
8453
8454 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8455 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8456 // Offset Count Stride
8457 // D0 0 1 4 (int) <- dummy dimension
8458 // D1 0 2 8 (2 * (1) * 4)
8459 // D2 1 2 20 (1 * (1 * 5) * 4)
8460 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8461 const Expr *StrideExpr = OASE->getStride();
8462 llvm::Value *Stride =
8463 StrideExpr
8464 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8465 CGF.Int64Ty, /*isSigned=*/false)
8466 : nullptr;
8467 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8468 if (Stride)
8469 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8470 else
8471 CurStrides.push_back(DimProd);
8472 if (DI != DimSizes.end())
8473 ++DI;
8474 }
8475
8476 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8477 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8478 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8479 }
8480
8481 /// Return the adjusted map modifiers if the declaration a capture refers to
8482 /// appears in a first-private clause. This is expected to be used only with
8483 /// directives that start with 'target'.
8484 OpenMPOffloadMappingFlags
8485 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8486 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8487
8488 // A first private variable captured by reference will use only the
8489 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8490 // declaration is known as first-private in this handler.
8491 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8492 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8493 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8494 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8495 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8496 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8497 }
8498 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8499 if (I != LambdasMap.end())
8500 // for map(to: lambda): using user specified map type.
8501 return getMapTypeBits(
8502 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8503 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8504 /*AddPtrFlag=*/false,
8505 /*AddIsTargetParamFlag=*/false,
8506 /*isNonContiguous=*/false);
8507 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8508 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8509 }
8510
8511 void getPlainLayout(const CXXRecordDecl *RD,
8512 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8513 bool AsBase) const {
8514 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8515
8516 llvm::StructType *St =
8517 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8518
8519 unsigned NumElements = St->getNumElements();
8520 llvm::SmallVector<
8521 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8522 RecordLayout(NumElements);
8523
8524 // Fill bases.
8525 for (const auto &I : RD->bases()) {
8526 if (I.isVirtual())
8527 continue;
8528
8529 QualType BaseTy = I.getType();
8530 const auto *Base = BaseTy->getAsCXXRecordDecl();
8531 // Ignore empty bases.
8532 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8533 CGF.getContext()
8534 .getASTRecordLayout(Base)
8536 .isZero())
8537 continue;
8538
8539 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8540 RecordLayout[FieldIndex] = Base;
8541 }
8542 // Fill in virtual bases.
8543 for (const auto &I : RD->vbases()) {
8544 QualType BaseTy = I.getType();
8545 // Ignore empty bases.
8546 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8547 continue;
8548
8549 const auto *Base = BaseTy->getAsCXXRecordDecl();
8550 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8551 if (RecordLayout[FieldIndex])
8552 continue;
8553 RecordLayout[FieldIndex] = Base;
8554 }
8555 // Fill in all the fields.
8556 assert(!RD->isUnion() && "Unexpected union.");
8557 for (const auto *Field : RD->fields()) {
8558 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8559 // will fill in later.)
8560 if (!Field->isBitField() &&
8561 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8562 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8563 RecordLayout[FieldIndex] = Field;
8564 }
8565 }
8566 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8567 &Data : RecordLayout) {
8568 if (Data.isNull())
8569 continue;
8570 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8571 getPlainLayout(Base, Layout, /*AsBase=*/true);
8572 else
8573 Layout.push_back(cast<const FieldDecl *>(Data));
8574 }
8575 }
8576
8577 /// Returns the address corresponding to \p PointerExpr.
8578 static Address getAttachPtrAddr(const Expr *PointerExpr,
8579 CodeGenFunction &CGF) {
8580 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8581 Address AttachPtrAddr = Address::invalid();
8582
8583 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8584 // If the pointer is a variable, we can use its address directly.
8585 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8586 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8587 AttachPtrAddr =
8588 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8589 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8590 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8591 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8592 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8593 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8594 assert(UO->getOpcode() == UO_Deref &&
8595 "Unexpected unary-operator on attach-ptr-expr");
8596 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8597 }
8598 assert(AttachPtrAddr.isValid() &&
8599 "Failed to get address for attach pointer expression");
8600 return AttachPtrAddr;
8601 }
8602
8603 /// Get the address of the attach pointer, and a load from it, to get the
8604 /// pointee base address.
8605 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8606 /// contains invalid addresses if \p AttachPtrExpr is null.
8607 static std::pair<Address, Address>
8608 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8609 CodeGenFunction &CGF) {
8610
8611 if (!AttachPtrExpr)
8612 return {Address::invalid(), Address::invalid()};
8613
8614 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8615 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8616
8617 QualType AttachPtrType =
8620
8621 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8622 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8623 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8624
8625 return {AttachPtrAddr, AttachPteeBaseAddr};
8626 }
8627
8628 /// Returns whether an attach entry should be emitted for a map on
8629 /// \p MapBaseDecl on the directive \p CurDir.
8630 static bool
8631 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8632 CodeGenFunction &CGF,
8633 llvm::PointerUnion<const OMPExecutableDirective *,
8634 const OMPDeclareMapperDecl *>
8635 CurDir) {
8636 if (!PointerExpr)
8637 return false;
8638
8639 // Pointer attachment is needed at map-entering time or for declare
8640 // mappers.
8641 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8644 ->getDirectiveKind());
8645 }
8646
8647 /// Computes the attach-ptr expr for \p Components, and updates various maps
8648 /// with the information.
8649 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8650 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8651 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8652 /// AttachPtrExprMap.
8653 void collectAttachPtrExprInfo(
8655 llvm::PointerUnion<const OMPExecutableDirective *,
8656 const OMPDeclareMapperDecl *>
8657 CurDir) {
8658
8659 OpenMPDirectiveKind CurDirectiveID =
8661 ? OMPD_declare_mapper
8662 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8663
8664 const auto &[AttachPtrExpr, Depth] =
8666 CurDirectiveID);
8667
8668 AttachPtrComputationOrderMap.try_emplace(
8669 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8670 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8671 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8672 }
8673
8674 /// Generate all the base pointers, section pointers, sizes, map types, and
8675 /// mappers for the extracted mappable expressions (all included in \a
8676 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8677 /// pair of the relevant declaration and index where it occurs is appended to
8678 /// the device pointers info array.
8679 void generateAllInfoForClauses(
8680 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8681 llvm::OpenMPIRBuilder &OMPBuilder,
8682 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8683 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8684 // We have to process the component lists that relate with the same
8685 // declaration in a single chunk so that we can generate the map flags
8686 // correctly. Therefore, we organize all lists in a map.
8687 enum MapKind { Present, Allocs, Other, Total };
8688 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8689 SmallVector<SmallVector<MapInfo, 8>, 4>>
8690 Info;
8691
8692 // Helper function to fill the information map for the different supported
8693 // clauses.
8694 auto &&InfoGen =
8695 [&Info, &SkipVarSet](
8696 const ValueDecl *D, MapKind Kind,
8698 OpenMPMapClauseKind MapType,
8699 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8700 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8701 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8702 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8703 if (SkipVarSet.contains(D))
8704 return;
8705 auto It = Info.try_emplace(D, Total).first;
8706 It->second[Kind].emplace_back(
8707 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8708 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8709 };
8710
8711 for (const auto *Cl : Clauses) {
8712 const auto *C = dyn_cast<OMPMapClause>(Cl);
8713 if (!C)
8714 continue;
8715 MapKind Kind = Other;
8716 if (llvm::is_contained(C->getMapTypeModifiers(),
8717 OMPC_MAP_MODIFIER_present))
8718 Kind = Present;
8719 else if (C->getMapType() == OMPC_MAP_alloc)
8720 Kind = Allocs;
8721 const auto *EI = C->getVarRefs().begin();
8722 for (const auto L : C->component_lists()) {
8723 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8724 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8725 C->getMapTypeModifiers(), {},
8726 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8727 E);
8728 ++EI;
8729 }
8730 }
8731 for (const auto *Cl : Clauses) {
8732 const auto *C = dyn_cast<OMPToClause>(Cl);
8733 if (!C)
8734 continue;
8735 MapKind Kind = Other;
8736 if (llvm::is_contained(C->getMotionModifiers(),
8737 OMPC_MOTION_MODIFIER_present))
8738 Kind = Present;
8739 if (llvm::is_contained(C->getMotionModifiers(),
8740 OMPC_MOTION_MODIFIER_iterator)) {
8741 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8742 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8743 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8744 CGF.EmitVarDecl(*VD);
8745 }
8746 }
8747
8748 const auto *EI = C->getVarRefs().begin();
8749 for (const auto L : C->component_lists()) {
8750 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8751 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8752 C->isImplicit(), std::get<2>(L), *EI);
8753 ++EI;
8754 }
8755 }
8756 for (const auto *Cl : Clauses) {
8757 const auto *C = dyn_cast<OMPFromClause>(Cl);
8758 if (!C)
8759 continue;
8760 MapKind Kind = Other;
8761 if (llvm::is_contained(C->getMotionModifiers(),
8762 OMPC_MOTION_MODIFIER_present))
8763 Kind = Present;
8764 if (llvm::is_contained(C->getMotionModifiers(),
8765 OMPC_MOTION_MODIFIER_iterator)) {
8766 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8767 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8768 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8769 CGF.EmitVarDecl(*VD);
8770 }
8771 }
8772
8773 const auto *EI = C->getVarRefs().begin();
8774 for (const auto L : C->component_lists()) {
8775 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8776 C->getMotionModifiers(),
8777 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8778 *EI);
8779 ++EI;
8780 }
8781 }
8782
8783 // Look at the use_device_ptr and use_device_addr clauses information and
8784 // mark the existing map entries as such. If there is no map information for
8785 // an entry in the use_device_ptr and use_device_addr list, we create one
8786 // with map type 'return_param' and zero size section. It is the user's
8787 // fault if that was not mapped before. If there is no map information, then
8788 // we defer the emission of that entry until all the maps for the same VD
8789 // have been handled.
8790 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8791
8792 auto &&UseDeviceDataCombinedInfoGen =
8793 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8794 CodeGenFunction &CGF, bool IsDevAddr) {
8795 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8796 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8797 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8798 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8799 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8800 // FIXME: For use_device_addr on array-sections, this should
8801 // be the starting address of the section.
8802 // e.g. int *p;
8803 // ... use_device_addr(p[3])
8804 // &p[0], &p[3], /*size=*/0, RETURN_PARAM
8805 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8806 UseDeviceDataCombinedInfo.Sizes.push_back(
8807 llvm::Constant::getNullValue(CGF.Int64Ty));
8808 UseDeviceDataCombinedInfo.Types.push_back(
8809 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8810 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8811 };
8812
8813 auto &&MapInfoGen =
8814 [&UseDeviceDataCombinedInfoGen](
8815 CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8817 Components,
8818 bool IsDevAddr, bool IEIsAttachPtrForDevAddr = false) {
8819 // We didn't find any match in our map information - generate a zero
8820 // size array section.
8821 llvm::Value *Ptr;
8822 if (IsDevAddr && !IEIsAttachPtrForDevAddr) {
8823 if (IE->isGLValue())
8824 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8825 else
8826 Ptr = CGF.EmitScalarExpr(IE);
8827 } else {
8828 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8829 }
8830 bool TreatDevAddrAsDevPtr = IEIsAttachPtrForDevAddr;
8831 // For the purpose of address-translation, treat something like the
8832 // following:
8833 // int *p;
8834 // ... use_device_addr(p[1])
8835 // equivalent to
8836 // ... use_device_ptr(p)
8837 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, /*IsDevAddr=*/IsDevAddr &&
8838 !TreatDevAddrAsDevPtr);
8839 };
8840
8841 auto &&IsMapInfoExist = [&Info, this](CodeGenFunction &CGF,
8842 const ValueDecl *VD, const Expr *IE,
8843 const Expr *DesiredAttachPtrExpr,
8844 bool IsDevAddr) -> bool {
8845 // We potentially have map information for this declaration already.
8846 // Look for the first set of components that refer to it. If found,
8847 // return true.
8848 // If the first component is a member expression, we have to look into
8849 // 'this', which maps to null in the map of map information. Otherwise
8850 // look directly for the information.
8851 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8852 if (It != Info.end()) {
8853 bool Found = false;
8854 for (auto &Data : It->second) {
8855 MapInfo *CI = nullptr;
8856 // We potentially have multiple maps for the same decl. We need to
8857 // only consider those for which the attach-ptr matches the desired
8858 // attach-ptr.
8859 auto *It = llvm::find_if(Data, [&](const MapInfo &MI) {
8860 if (MI.Components.back().getAssociatedDeclaration() != VD)
8861 return false;
8862
8863 const Expr *MapAttachPtr = getAttachPtrExpr(MI.Components);
8864 bool Match = AttachPtrComparator.areEqual(MapAttachPtr,
8865 DesiredAttachPtrExpr);
8866 return Match;
8867 });
8868
8869 if (It != Data.end())
8870 CI = &*It;
8871
8872 if (CI) {
8873 if (IsDevAddr) {
8874 CI->ForDeviceAddr = true;
8875 CI->ReturnDevicePointer = true;
8876 Found = true;
8877 break;
8878 } else {
8879 auto PrevCI = std::next(CI->Components.rbegin());
8880 const auto *VarD = dyn_cast<VarDecl>(VD);
8881 const Expr *AttachPtrExpr = getAttachPtrExpr(CI->Components);
8882 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8883 isa<MemberExpr>(IE) ||
8884 !VD->getType().getNonReferenceType()->isPointerType() ||
8885 PrevCI == CI->Components.rend() ||
8886 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8887 VarD->hasLocalStorage() ||
8888 (isa_and_nonnull<DeclRefExpr>(AttachPtrExpr) &&
8889 VD == cast<DeclRefExpr>(AttachPtrExpr)->getDecl())) {
8890 CI->ForDeviceAddr = IsDevAddr;
8891 CI->ReturnDevicePointer = true;
8892 Found = true;
8893 break;
8894 }
8895 }
8896 }
8897 }
8898 return Found;
8899 }
8900 return false;
8901 };
8902
8903 // Look at the use_device_ptr clause information and mark the existing map
8904 // entries as such. If there is no map information for an entry in the
8905 // use_device_ptr list, we create one with map type 'alloc' and zero size
8906 // section. It is the user fault if that was not mapped before. If there is
8907 // no map information and the pointer is a struct member, then we defer the
8908 // emission of that entry until the whole struct has been processed.
8909 for (const auto *Cl : Clauses) {
8910 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8911 if (!C)
8912 continue;
8913 for (const auto L : C->component_lists()) {
8915 std::get<1>(L);
8916 assert(!Components.empty() &&
8917 "Not expecting empty list of components!");
8918 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8920 const Expr *IE = Components.back().getAssociatedExpression();
8921 // For use_device_ptr, we match an existing map clause if its attach-ptr
8922 // is same as the use_device_ptr operand. e.g.
8923 // map expr | use_device_ptr expr | current behavior
8924 // ---------|---------------------|-----------------
8925 // p[1] | p | match
8926 // ps->a | ps | match
8927 // p | p | no match
8928 const Expr *UDPOperandExpr =
8929 Components.front().getAssociatedExpression();
8930 if (IsMapInfoExist(CGF, VD, IE,
8931 /*DesiredAttachPtrExpr=*/UDPOperandExpr,
8932 /*IsDevAddr=*/false))
8933 continue;
8934 MapInfoGen(CGF, IE, VD, Components, /*IsDevAddr=*/false);
8935 }
8936 }
8937
8938 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8939 for (const auto *Cl : Clauses) {
8940 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8941 if (!C)
8942 continue;
8943 for (const auto L : C->component_lists()) {
8945 std::get<1>(L);
8946 assert(!std::get<1>(L).empty() &&
8947 "Not expecting empty list of components!");
8948 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8949 if (!Processed.insert(VD).second)
8950 continue;
8952 // For use_device_addr, we match an existing map clause if the
8953 // use_device_addr operand's attach-ptr matches the map operand's
8954 // attach-ptr.
8955 // We chould also restrict to only match cases when there is a full
8956 // match between the map/use_device_addr clause exprs, but that may be
8957 // unnecessary.
8958 //
8959 // map expr | use_device_addr expr | current | possible restrictive/
8960 // | | behavior | safer behavior
8961 // ---------|----------------------|-----------|-----------------------
8962 // p | p | match | match
8963 // p[0] | p[0] | match | match
8964 // p[0:1] | p[0] | match | no match
8965 // p[0:1] | p[2:1] | match | no match
8966 // p[1] | p[0] | match | no match
8967 // ps->a | ps->b | match | no match
8968 // p | p[0] | no match | no match
8969 // pp | pp[0][0] | no match | no match
8970 const Expr *UDAAttachPtrExpr = getAttachPtrExpr(Components);
8971 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8972 assert((!UDAAttachPtrExpr || UDAAttachPtrExpr == IE) &&
8973 "use_device_addr operand has an attach-ptr, but does not match "
8974 "last component's expr.");
8975 if (IsMapInfoExist(CGF, VD, IE,
8976 /*DesiredAttachPtrExpr=*/UDAAttachPtrExpr,
8977 /*IsDevAddr=*/true))
8978 continue;
8979 MapInfoGen(CGF, IE, VD, Components,
8980 /*IsDevAddr=*/true,
8981 /*IEIsAttachPtrForDevAddr=*/UDAAttachPtrExpr != nullptr);
8982 }
8983 }
8984
8985 for (const auto &Data : Info) {
8986 MapCombinedInfoTy CurInfo;
8987 const Decl *D = Data.first;
8988 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8989 // Group component lists by their AttachPtrExpr and process them in order
8990 // of increasing complexity (nullptr first, then simple expressions like
8991 // p, then more complex ones like p[0], etc.)
8992 //
8993 // This is similar to how generateInfoForCaptureFromClauseInfo handles
8994 // grouping for target constructs.
8995 SmallVector<std::pair<const Expr *, MapInfo>, 16> AttachPtrMapInfoPairs;
8996
8997 // First, collect all MapData entries with their attach-ptr exprs.
8998 for (const auto &M : Data.second) {
8999 for (const MapInfo &L : M) {
9000 assert(!L.Components.empty() &&
9001 "Not expecting declaration with no component lists.");
9002
9003 const Expr *AttachPtrExpr = getAttachPtrExpr(L.Components);
9004 AttachPtrMapInfoPairs.emplace_back(AttachPtrExpr, L);
9005 }
9006 }
9007
9008 // Next, sort by increasing order of their complexity.
9009 llvm::stable_sort(AttachPtrMapInfoPairs,
9010 [this](const auto &LHS, const auto &RHS) {
9011 return AttachPtrComparator(LHS.first, RHS.first);
9012 });
9013
9014 // And finally, process them all in order, grouping those with
9015 // equivalent attach-ptr exprs together.
9016 auto *It = AttachPtrMapInfoPairs.begin();
9017 while (It != AttachPtrMapInfoPairs.end()) {
9018 const Expr *AttachPtrExpr = It->first;
9019
9020 SmallVector<MapInfo, 8> GroupLists;
9021 while (It != AttachPtrMapInfoPairs.end() &&
9022 (It->first == AttachPtrExpr ||
9023 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9024 GroupLists.push_back(It->second);
9025 ++It;
9026 }
9027 assert(!GroupLists.empty() && "GroupLists should not be empty");
9028
9029 StructRangeInfoTy PartialStruct;
9030 AttachInfoTy AttachInfo;
9031 MapCombinedInfoTy GroupCurInfo;
9032 // Current group's struct base information:
9033 MapCombinedInfoTy GroupStructBaseCurInfo;
9034 for (const MapInfo &L : GroupLists) {
9035 // Remember the current base pointer index.
9036 unsigned CurrentBasePointersIdx = GroupCurInfo.BasePointers.size();
9037 unsigned StructBasePointersIdx =
9038 GroupStructBaseCurInfo.BasePointers.size();
9039
9040 GroupCurInfo.NonContigInfo.IsNonContiguous =
9041 L.Components.back().isNonContiguous();
9042 generateInfoForComponentList(
9043 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
9044 GroupCurInfo, GroupStructBaseCurInfo, PartialStruct, AttachInfo,
9045 /*IsFirstComponentList=*/false, L.IsImplicit,
9046 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
9047 L.VarRef, /*OverlappedElements*/ {});
9048
9049 // If this entry relates to a device pointer, set the relevant
9050 // declaration and add the 'return pointer' flag.
9051 if (L.ReturnDevicePointer) {
9052 // Check whether a value was added to either GroupCurInfo or
9053 // GroupStructBaseCurInfo and error if no value was added to either
9054 // of them:
9055 assert((CurrentBasePointersIdx < GroupCurInfo.BasePointers.size() ||
9056 StructBasePointersIdx <
9057 GroupStructBaseCurInfo.BasePointers.size()) &&
9058 "Unexpected number of mapped base pointers.");
9059
9060 // Choose a base pointer index which is always valid:
9061 const ValueDecl *RelevantVD =
9062 L.Components.back().getAssociatedDeclaration();
9063 assert(RelevantVD &&
9064 "No relevant declaration related with device pointer??");
9065
9066 // If GroupStructBaseCurInfo has been updated this iteration then
9067 // work on the first new entry added to it i.e. make sure that when
9068 // multiple values are added to any of the lists, the first value
9069 // added is being modified by the assignments below (not the last
9070 // value added).
9071 if (StructBasePointersIdx <
9072 GroupStructBaseCurInfo.BasePointers.size()) {
9073 GroupStructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
9074 RelevantVD;
9075 GroupStructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
9076 L.ForDeviceAddr ? DeviceInfoTy::Address
9077 : DeviceInfoTy::Pointer;
9078 GroupStructBaseCurInfo.Types[StructBasePointersIdx] |=
9079 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9080 } else {
9081 GroupCurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
9082 GroupCurInfo.DevicePointers[CurrentBasePointersIdx] =
9083 L.ForDeviceAddr ? DeviceInfoTy::Address
9084 : DeviceInfoTy::Pointer;
9085 GroupCurInfo.Types[CurrentBasePointersIdx] |=
9086 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
9087 }
9088 }
9089 }
9090
9091 // Unify entries in one list making sure the struct mapping precedes the
9092 // individual fields:
9093 MapCombinedInfoTy GroupUnionCurInfo;
9094 GroupUnionCurInfo.append(GroupStructBaseCurInfo);
9095 GroupUnionCurInfo.append(GroupCurInfo);
9096
9097 // If there is an entry in PartialStruct it means we have a struct with
9098 // individual members mapped. Emit an extra combined entry.
9099 if (PartialStruct.Base.isValid()) {
9100 GroupUnionCurInfo.NonContigInfo.Dims.push_back(0);
9101 emitCombinedEntry(
9102 CurInfo, GroupUnionCurInfo.Types, PartialStruct, AttachInfo,
9103 /*IsMapThis*/ !VD, OMPBuilder, VD,
9104 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size(),
9105 /*NotTargetParam=*/true);
9106 }
9107
9108 // Append this group's results to the overall CurInfo in the correct
9109 // order: combined-entry -> original-field-entries -> attach-entry
9110 CurInfo.append(GroupUnionCurInfo);
9111 if (AttachInfo.isValid())
9112 emitAttachEntry(CGF, CurInfo, AttachInfo);
9113 }
9114
9115 // We need to append the results of this capture to what we already have.
9116 CombinedInfo.append(CurInfo);
9117 }
9118 // Append data for use_device_ptr/addr clauses.
9119 CombinedInfo.append(UseDeviceDataCombinedInfo);
9120 }
9121
9122public:
9123 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
9124 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
9125 // Extract firstprivate clause information.
9126 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
9127 for (const auto *D : C->varlist())
9128 FirstPrivateDecls.try_emplace(
9129 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
9130 // Extract implicit firstprivates from uses_allocators clauses.
9131 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9132 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9133 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9134 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
9135 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
9136 /*Implicit=*/true);
9137 else if (const auto *VD = dyn_cast<VarDecl>(
9138 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
9139 ->getDecl()))
9140 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
9141 }
9142 }
9143 // Extract defaultmap clause information.
9144 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9145 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9146 DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
9147 // Extract device pointer clause information.
9148 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9149 for (auto L : C->component_lists())
9150 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
9151 // Extract device addr clause information.
9152 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9153 for (auto L : C->component_lists())
9154 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
9155 // Extract map information.
9156 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9157 if (C->getMapType() != OMPC_MAP_to)
9158 continue;
9159 for (auto L : C->component_lists()) {
9160 const ValueDecl *VD = std::get<0>(L);
9161 const auto *RD = VD ? VD->getType()
9162 .getCanonicalType()
9163 .getNonReferenceType()
9164 ->getAsCXXRecordDecl()
9165 : nullptr;
9166 if (RD && RD->isLambda())
9167 LambdasMap.try_emplace(std::get<0>(L), C);
9168 }
9169 }
9170
9171 auto CollectAttachPtrExprsForClauseComponents = [this](const auto *C) {
9172 for (auto L : C->component_lists()) {
9174 std::get<1>(L);
9175 if (!Components.empty())
9176 collectAttachPtrExprInfo(Components, CurDir);
9177 }
9178 };
9179
9180 // Populate the AttachPtrExprMap for all component lists from map-related
9181 // clauses.
9182 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>())
9183 CollectAttachPtrExprsForClauseComponents(C);
9184 for (const auto *C : Dir.getClausesOfKind<OMPToClause>())
9185 CollectAttachPtrExprsForClauseComponents(C);
9186 for (const auto *C : Dir.getClausesOfKind<OMPFromClause>())
9187 CollectAttachPtrExprsForClauseComponents(C);
9188 for (const auto *C : Dir.getClausesOfKind<OMPUseDevicePtrClause>())
9189 CollectAttachPtrExprsForClauseComponents(C);
9190 for (const auto *C : Dir.getClausesOfKind<OMPUseDeviceAddrClause>())
9191 CollectAttachPtrExprsForClauseComponents(C);
9192 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9193 CollectAttachPtrExprsForClauseComponents(C);
9194 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9195 CollectAttachPtrExprsForClauseComponents(C);
9196 }
9197
9198 /// Constructor for the declare mapper directive.
9199 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9200 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9201
9202 /// Generate code for the combined entry if we have a partially mapped struct
9203 /// and take care of the mapping flags of the arguments corresponding to
9204 /// individual struct members.
9205 /// If a valid \p AttachInfo exists, its pointee addr will be updated to point
9206 /// to the combined-entry's begin address, if emitted.
9207 /// \p PartialStruct contains attach base-pointer information.
9208 /// \returns The index of the combined entry if one was added, std::nullopt
9209 /// otherwise.
9210 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9211 MapFlagsArrayTy &CurTypes,
9212 const StructRangeInfoTy &PartialStruct,
9213 AttachInfoTy &AttachInfo, bool IsMapThis,
9214 llvm::OpenMPIRBuilder &OMPBuilder, const ValueDecl *VD,
9215 unsigned OffsetForMemberOfFlag,
9216 bool NotTargetParams) const {
9217 if (CurTypes.size() == 1 &&
9218 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9219 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9220 !PartialStruct.IsArraySection)
9221 return;
9222 Address LBAddr = PartialStruct.LowestElem.second;
9223 Address HBAddr = PartialStruct.HighestElem.second;
9224 if (PartialStruct.HasCompleteRecord) {
9225 LBAddr = PartialStruct.LB;
9226 HBAddr = PartialStruct.LB;
9227 }
9228 CombinedInfo.Exprs.push_back(VD);
9229 // Base is the base of the struct
9230 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9231 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9232 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9233 // Pointer is the address of the lowest element
9234 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9235 const CXXMethodDecl *MD =
9236 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9237 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9238 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9239 // There should not be a mapper for a combined entry.
9240 if (HasBaseClass) {
9241 // OpenMP 5.2 148:21:
9242 // If the target construct is within a class non-static member function,
9243 // and a variable is an accessible data member of the object for which the
9244 // non-static data member function is invoked, the variable is treated as
9245 // if the this[:1] expression had appeared in a map clause with a map-type
9246 // of tofrom.
9247 // Emit this[:1]
9248 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9249 QualType Ty = MD->getFunctionObjectParameterType();
9250 llvm::Value *Size =
9251 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9252 /*isSigned=*/true);
9253 CombinedInfo.Sizes.push_back(Size);
9254 } else {
9255 CombinedInfo.Pointers.push_back(LB);
9256 // Size is (addr of {highest+1} element) - (addr of lowest element)
9257 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9258 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9259 HBAddr.getElementType(), HB, /*Idx0=*/1);
9260 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9261 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9262 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9263 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9264 /*isSigned=*/false);
9265 CombinedInfo.Sizes.push_back(Size);
9266 }
9267 CombinedInfo.Mappers.push_back(nullptr);
9268 // Map type is always TARGET_PARAM, if generate info for captures.
9269 CombinedInfo.Types.push_back(
9270 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9271 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9272 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9273 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9274 // If any element has the present modifier, then make sure the runtime
9275 // doesn't attempt to allocate the struct.
9276 if (CurTypes.end() !=
9277 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9278 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9279 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9280 }))
9281 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9282 // Remove TARGET_PARAM flag from the first element
9283 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9284 // If any element has the ompx_hold modifier, then make sure the runtime
9285 // uses the hold reference count for the struct as a whole so that it won't
9286 // be unmapped by an extra dynamic reference count decrement. Add it to all
9287 // elements as well so the runtime knows which reference count to check
9288 // when determining whether it's time for device-to-host transfers of
9289 // individual elements.
9290 if (CurTypes.end() !=
9291 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9292 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9293 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9294 })) {
9295 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9296 for (auto &M : CurTypes)
9297 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9298 }
9299
9300 // All other current entries will be MEMBER_OF the combined entry
9301 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9302 // 0xFFFF in the MEMBER_OF field, or ATTACH entries since they are expected
9303 // to be handled by themselves, after all other maps).
9304 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9305 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9306 for (auto &M : CurTypes)
9307 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9308
9309 // When we are emitting a combined entry. If there were any pending
9310 // attachments to be done, we do them to the begin address of the combined
9311 // entry. Note that this means only one attachment per combined-entry will
9312 // be done. So, for instance, if we have:
9313 // S *ps;
9314 // ... map(ps->a, ps->b)
9315 // When we are emitting a combined entry. If AttachInfo is valid,
9316 // update the pointee address to point to the begin address of the combined
9317 // entry. This ensures that if we have multiple maps like:
9318 // `map(ps->a, ps->b)`, we still get a single ATTACH entry, like:
9319 //
9320 // &ps[0], &ps->a, sizeof(ps->a to ps->b), ALLOC // combined-entry
9321 // &ps[0], &ps->a, sizeof(ps->a), TO | FROM
9322 // &ps[0], &ps->b, sizeof(ps->b), TO | FROM
9323 // &ps, &ps->a, sizeof(void*), ATTACH // Use combined-entry's LB
9324 if (AttachInfo.isValid())
9325 AttachInfo.AttachPteeAddr = LBAddr;
9326 }
9327
9328 /// Generate all the base pointers, section pointers, sizes, map types, and
9329 /// mappers for the extracted mappable expressions (all included in \a
9330 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9331 /// pair of the relevant declaration and index where it occurs is appended to
9332 /// the device pointers info array.
9333 void generateAllInfo(
9334 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9335 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9336 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9337 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9338 "Expect a executable directive");
9339 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9340 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9341 SkipVarSet);
9342 }
9343
9344 /// Generate all the base pointers, section pointers, sizes, map types, and
9345 /// mappers for the extracted map clauses of user-defined mapper (all included
9346 /// in \a CombinedInfo).
9347 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9348 llvm::OpenMPIRBuilder &OMPBuilder) const {
9349 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9350 "Expect a declare mapper directive");
9351 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9352 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9353 OMPBuilder);
9354 }
9355
9356 /// Emit capture info for lambdas for variables captured by reference.
9357 void generateInfoForLambdaCaptures(
9358 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9359 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9360 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9361 const auto *RD = VDType->getAsCXXRecordDecl();
9362 if (!RD || !RD->isLambda())
9363 return;
9364 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9365 CGF.getContext().getDeclAlign(VD));
9366 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9367 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9368 FieldDecl *ThisCapture = nullptr;
9369 RD->getCaptureFields(Captures, ThisCapture);
9370 if (ThisCapture) {
9371 LValue ThisLVal =
9372 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9373 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9374 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9375 VDLVal.getPointer(CGF));
9376 CombinedInfo.Exprs.push_back(VD);
9377 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9378 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9379 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9380 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9381 CombinedInfo.Sizes.push_back(
9382 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9383 CGF.Int64Ty, /*isSigned=*/true));
9384 CombinedInfo.Types.push_back(
9385 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9386 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9387 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9388 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9389 CombinedInfo.Mappers.push_back(nullptr);
9390 }
9391 for (const LambdaCapture &LC : RD->captures()) {
9392 if (!LC.capturesVariable())
9393 continue;
9394 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9395 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9396 continue;
9397 auto It = Captures.find(VD);
9398 assert(It != Captures.end() && "Found lambda capture without field.");
9399 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9400 if (LC.getCaptureKind() == LCK_ByRef) {
9401 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9402 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9403 VDLVal.getPointer(CGF));
9404 CombinedInfo.Exprs.push_back(VD);
9405 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9406 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9407 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9408 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9409 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9410 CGF.getTypeSize(
9412 CGF.Int64Ty, /*isSigned=*/true));
9413 } else {
9414 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9415 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9416 VDLVal.getPointer(CGF));
9417 CombinedInfo.Exprs.push_back(VD);
9418 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9419 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9420 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9421 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9422 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9423 }
9424 CombinedInfo.Types.push_back(
9425 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9426 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9427 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9428 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9429 CombinedInfo.Mappers.push_back(nullptr);
9430 }
9431 }
9432
9433 /// Set correct indices for lambdas captures.
9434 void adjustMemberOfForLambdaCaptures(
9435 llvm::OpenMPIRBuilder &OMPBuilder,
9436 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9437 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9438 MapFlagsArrayTy &Types) const {
9439 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9440 // Set correct member_of idx for all implicit lambda captures.
9441 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9442 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9443 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9444 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9445 continue;
9446 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9447 assert(BasePtr && "Unable to find base lambda address.");
9448 int TgtIdx = -1;
9449 for (unsigned J = I; J > 0; --J) {
9450 unsigned Idx = J - 1;
9451 if (Pointers[Idx] != BasePtr)
9452 continue;
9453 TgtIdx = Idx;
9454 break;
9455 }
9456 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9457 // All other current entries will be MEMBER_OF the combined entry
9458 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9459 // 0xFFFF in the MEMBER_OF field).
9460 OpenMPOffloadMappingFlags MemberOfFlag =
9461 OMPBuilder.getMemberOfFlag(TgtIdx);
9462 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9463 }
9464 }
9465
9466 /// Populate component lists for non-lambda captured variables from map,
9467 /// is_device_ptr and has_device_addr clause info.
9468 void populateComponentListsForNonLambdaCaptureFromClauses(
9469 const ValueDecl *VD, MapDataArrayTy &DeclComponentLists,
9470 SmallVectorImpl<
9471 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9472 &StorageForImplicitlyAddedComponentLists) const {
9473 if (VD && LambdasMap.count(VD))
9474 return;
9475
9476 // For member fields list in is_device_ptr, store it in
9477 // DeclComponentLists for generating components info.
9479 auto It = DevPointersMap.find(VD);
9480 if (It != DevPointersMap.end())
9481 for (const auto &MCL : It->second)
9482 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9483 /*IsImpicit = */ true, nullptr,
9484 nullptr);
9485 auto I = HasDevAddrsMap.find(VD);
9486 if (I != HasDevAddrsMap.end())
9487 for (const auto &MCL : I->second)
9488 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9489 /*IsImpicit = */ true, nullptr,
9490 nullptr);
9491 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9492 "Expect a executable directive");
9493 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9494 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9495 const auto *EI = C->getVarRefs().begin();
9496 for (const auto L : C->decl_component_lists(VD)) {
9497 const ValueDecl *VDecl, *Mapper;
9498 // The Expression is not correct if the mapping is implicit
9499 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9501 std::tie(VDecl, Components, Mapper) = L;
9502 assert(VDecl == VD && "We got information for the wrong declaration??");
9503 assert(!Components.empty() &&
9504 "Not expecting declaration with no component lists.");
9505 DeclComponentLists.emplace_back(Components, C->getMapType(),
9506 C->getMapTypeModifiers(),
9507 C->isImplicit(), Mapper, E);
9508 ++EI;
9509 }
9510 }
9511
9512 // For the target construct, if there's a map with a base-pointer that's
9513 // a member of an implicitly captured struct, of the current class,
9514 // we need to emit an implicit map on the pointer.
9515 if (isOpenMPTargetExecutionDirective(CurExecDir->getDirectiveKind()))
9516 addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9517 VD, DeclComponentLists, StorageForImplicitlyAddedComponentLists);
9518
9519 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9520 const MapData &RHS) {
9521 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9522 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9523 bool HasPresent =
9524 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9525 bool HasAllocs = MapType == OMPC_MAP_alloc;
9526 MapModifiers = std::get<2>(RHS);
9527 MapType = std::get<1>(LHS);
9528 bool HasPresentR =
9529 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9530 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9531 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9532 });
9533 }
9534
9535 /// On a target construct, if there's an implicit map on a struct, or that of
9536 /// this[:], and an explicit map with a member of that struct/class as the
9537 /// base-pointer, we need to make sure that base-pointer is implicitly mapped,
9538 /// to make sure we don't map the full struct/class. For example:
9539 ///
9540 /// \code
9541 /// struct S {
9542 /// int dummy[10000];
9543 /// int *p;
9544 /// void f1() {
9545 /// #pragma omp target map(p[0:1])
9546 /// (void)this;
9547 /// }
9548 /// }; S s;
9549 ///
9550 /// void f2() {
9551 /// #pragma omp target map(s.p[0:10])
9552 /// (void)s;
9553 /// }
9554 /// \endcode
9555 ///
9556 /// Only `this-p` and `s.p` should be mapped in the two cases above.
9557 //
9558 // OpenMP 6.0: 7.9.6 map clause, pg 285
9559 // If a list item with an implicitly determined data-mapping attribute does
9560 // not have any corresponding storage in the device data environment prior to
9561 // a task encountering the construct associated with the map clause, and one
9562 // or more contiguous parts of the original storage are either list items or
9563 // base pointers to list items that are explicitly mapped on the construct,
9564 // only those parts of the original storage will have corresponding storage in
9565 // the device data environment as a result of the map clauses on the
9566 // construct.
9567 void addImplicitMapForAttachPtrBaseIfMemberOfCapturedVD(
9568 const ValueDecl *CapturedVD, MapDataArrayTy &DeclComponentLists,
9569 SmallVectorImpl<
9570 SmallVector<OMPClauseMappableExprCommon::MappableComponent, 8>>
9571 &ComponentVectorStorage) const {
9572 bool IsThisCapture = CapturedVD == nullptr;
9573
9574 for (const auto &ComponentsAndAttachPtr : AttachPtrExprMap) {
9576 ComponentsWithAttachPtr = ComponentsAndAttachPtr.first;
9577 const Expr *AttachPtrExpr = ComponentsAndAttachPtr.second;
9578 if (!AttachPtrExpr)
9579 continue;
9580
9581 const auto *ME = dyn_cast<MemberExpr>(AttachPtrExpr);
9582 if (!ME)
9583 continue;
9584
9585 const Expr *Base = ME->getBase()->IgnoreParenImpCasts();
9586
9587 // If we are handling a "this" capture, then we are looking for
9588 // attach-ptrs of form `this->p`, either explicitly or implicitly.
9589 if (IsThisCapture && !ME->isImplicitCXXThis() && !isa<CXXThisExpr>(Base))
9590 continue;
9591
9592 if (!IsThisCapture && (!isa<DeclRefExpr>(Base) ||
9593 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9594 continue;
9595
9596 // For non-this captures, we are looking for attach-ptrs of form
9597 // `s.p`.
9598 // For non-this captures, we are looking for attach-ptrs like `s.p`.
9599 if (!IsThisCapture && (ME->isArrow() || !isa<DeclRefExpr>(Base) ||
9600 cast<DeclRefExpr>(Base)->getDecl() != CapturedVD))
9601 continue;
9602
9603 // Check if we have an existing map on either:
9604 // this[:], s, this->p, or s.p, in which case, we don't need to add
9605 // an implicit one for the attach-ptr s.p/this->p.
9606 bool FoundExistingMap = false;
9607 for (const MapData &ExistingL : DeclComponentLists) {
9609 ExistingComponents = std::get<0>(ExistingL);
9610
9611 if (ExistingComponents.empty())
9612 continue;
9613
9614 // First check if we have a map like map(this->p) or map(s.p).
9615 const auto &FirstComponent = ExistingComponents.front();
9616 const Expr *FirstExpr = FirstComponent.getAssociatedExpression();
9617
9618 if (!FirstExpr)
9619 continue;
9620
9621 // First check if we have a map like map(this->p) or map(s.p).
9622 if (AttachPtrComparator.areEqual(FirstExpr, AttachPtrExpr)) {
9623 FoundExistingMap = true;
9624 break;
9625 }
9626
9627 // Check if we have a map like this[0:1]
9628 if (IsThisCapture) {
9629 if (const auto *OASE = dyn_cast<ArraySectionExpr>(FirstExpr)) {
9630 if (isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts())) {
9631 FoundExistingMap = true;
9632 break;
9633 }
9634 }
9635 continue;
9636 }
9637
9638 // When the attach-ptr is something like `s.p`, check if
9639 // `s` itself is mapped explicitly.
9640 if (const auto *DRE = dyn_cast<DeclRefExpr>(FirstExpr)) {
9641 if (DRE->getDecl() == CapturedVD) {
9642 FoundExistingMap = true;
9643 break;
9644 }
9645 }
9646 }
9647
9648 if (FoundExistingMap)
9649 continue;
9650
9651 // If no base map is found, we need to create an implicit map for the
9652 // attach-pointer expr.
9653
9654 ComponentVectorStorage.emplace_back();
9655 auto &AttachPtrComponents = ComponentVectorStorage.back();
9656
9658 bool SeenAttachPtrComponent = false;
9659 // For creating a map on the attach-ptr `s.p/this->p`, we copy all
9660 // components from the component-list which has `s.p/this->p`
9661 // as the attach-ptr, starting from the component which matches
9662 // `s.p/this->p`. This way, we'll have component-lists of
9663 // `s.p` -> `s`, and `this->p` -> `this`.
9664 for (size_t i = 0; i < ComponentsWithAttachPtr.size(); ++i) {
9665 const auto &Component = ComponentsWithAttachPtr[i];
9666 const Expr *ComponentExpr = Component.getAssociatedExpression();
9667
9668 if (!SeenAttachPtrComponent && ComponentExpr != AttachPtrExpr)
9669 continue;
9670 SeenAttachPtrComponent = true;
9671
9672 AttachPtrComponents.emplace_back(Component.getAssociatedExpression(),
9673 Component.getAssociatedDeclaration(),
9674 Component.isNonContiguous());
9675 }
9676 assert(!AttachPtrComponents.empty() &&
9677 "Could not populate component-lists for mapping attach-ptr");
9678
9679 DeclComponentLists.emplace_back(
9680 AttachPtrComponents, OMPC_MAP_tofrom, Unknown,
9681 /*IsImplicit=*/true, /*mapper=*/nullptr, AttachPtrExpr);
9682 }
9683 }
9684
9685 /// For a capture that has an associated clause, generate the base pointers,
9686 /// section pointers, sizes, map types, and mappers (all included in
9687 /// \a CurCaptureVarInfo).
9688 void generateInfoForCaptureFromClauseInfo(
9689 const MapDataArrayTy &DeclComponentListsFromClauses,
9690 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9691 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9692 unsigned OffsetForMemberOfFlag) const {
9693 assert(!Cap->capturesVariableArrayType() &&
9694 "Not expecting to generate map info for a variable array type!");
9695
9696 // We need to know when we generating information for the first component
9697 const ValueDecl *VD = Cap->capturesThis()
9698 ? nullptr
9699 : Cap->getCapturedVar()->getCanonicalDecl();
9700
9701 // for map(to: lambda): skip here, processing it in
9702 // generateDefaultMapInfo
9703 if (LambdasMap.count(VD))
9704 return;
9705
9706 // If this declaration appears in a is_device_ptr clause we just have to
9707 // pass the pointer by value. If it is a reference to a declaration, we just
9708 // pass its value.
9709 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9710 CurCaptureVarInfo.Exprs.push_back(VD);
9711 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9712 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9713 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9714 CurCaptureVarInfo.Pointers.push_back(Arg);
9715 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9716 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9717 /*isSigned=*/true));
9718 CurCaptureVarInfo.Types.push_back(
9719 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9720 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9721 CurCaptureVarInfo.Mappers.push_back(nullptr);
9722 return;
9723 }
9724
9725 auto GenerateInfoForComponentLists =
9726 [&](ArrayRef<MapData> DeclComponentListsFromClauses,
9727 bool IsEligibleForTargetParamFlag) {
9728 MapCombinedInfoTy CurInfoForComponentLists;
9729 StructRangeInfoTy PartialStruct;
9730 AttachInfoTy AttachInfo;
9731
9732 if (DeclComponentListsFromClauses.empty())
9733 return;
9734
9735 generateInfoForCaptureFromComponentLists(
9736 VD, DeclComponentListsFromClauses, CurInfoForComponentLists,
9737 PartialStruct, AttachInfo, IsEligibleForTargetParamFlag);
9738
9739 // If there is an entry in PartialStruct it means we have a
9740 // struct with individual members mapped. Emit an extra combined
9741 // entry.
9742 if (PartialStruct.Base.isValid()) {
9743 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9744 emitCombinedEntry(
9745 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9746 PartialStruct, AttachInfo, Cap->capturesThis(), OMPBuilder,
9747 /*VD=*/nullptr, OffsetForMemberOfFlag,
9748 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9749 }
9750
9751 // We do the appends to get the entries in the following order:
9752 // combined-entry -> individual-field-entries -> attach-entry,
9753 CurCaptureVarInfo.append(CurInfoForComponentLists);
9754 if (AttachInfo.isValid())
9755 emitAttachEntry(CGF, CurCaptureVarInfo, AttachInfo);
9756 };
9757
9758 // Group component lists by their AttachPtrExpr and process them in order
9759 // of increasing complexity (nullptr first, then simple expressions like p,
9760 // then more complex ones like p[0], etc.)
9761 //
9762 // This ensure that we:
9763 // * handle maps that can contribute towards setting the kernel argument,
9764 // (e.g. map(ps), or map(ps[0])), before any that cannot (e.g. ps->pt->d).
9765 // * allocate a single contiguous storage for all exprs with the same
9766 // captured var and having the same attach-ptr.
9767 //
9768 // Example: The map clauses below should be handled grouped together based
9769 // on their attachable-base-pointers:
9770 // map-clause | attachable-base-pointer
9771 // --------------------------+------------------------
9772 // map(p, ps) | nullptr
9773 // map(p[0]) | p
9774 // map(p[0]->b, p[0]->c) | p[0]
9775 // map(ps->d, ps->e, ps->pt) | ps
9776 // map(ps->pt->d, ps->pt->e) | ps->pt
9777
9778 // First, collect all MapData entries with their attach-ptr exprs.
9779 SmallVector<std::pair<const Expr *, MapData>, 16> AttachPtrMapDataPairs;
9780
9781 for (const MapData &L : DeclComponentListsFromClauses) {
9783 std::get<0>(L);
9784 const Expr *AttachPtrExpr = getAttachPtrExpr(Components);
9785 AttachPtrMapDataPairs.emplace_back(AttachPtrExpr, L);
9786 }
9787
9788 // Next, sort by increasing order of their complexity.
9789 llvm::stable_sort(AttachPtrMapDataPairs,
9790 [this](const auto &LHS, const auto &RHS) {
9791 return AttachPtrComparator(LHS.first, RHS.first);
9792 });
9793
9794 bool NoDefaultMappingDoneForVD = CurCaptureVarInfo.BasePointers.empty();
9795 bool IsFirstGroup = true;
9796
9797 // And finally, process them all in order, grouping those with
9798 // equivalent attach-ptr exprs together.
9799 auto *It = AttachPtrMapDataPairs.begin();
9800 while (It != AttachPtrMapDataPairs.end()) {
9801 const Expr *AttachPtrExpr = It->first;
9802
9803 MapDataArrayTy GroupLists;
9804 while (It != AttachPtrMapDataPairs.end() &&
9805 (It->first == AttachPtrExpr ||
9806 AttachPtrComparator.areEqual(It->first, AttachPtrExpr))) {
9807 GroupLists.push_back(It->second);
9808 ++It;
9809 }
9810 assert(!GroupLists.empty() && "GroupLists should not be empty");
9811
9812 // Determine if this group of component-lists is eligible for TARGET_PARAM
9813 // flag. Only the first group processed should be eligible, and only if no
9814 // default mapping was done.
9815 bool IsEligibleForTargetParamFlag =
9816 IsFirstGroup && NoDefaultMappingDoneForVD;
9817
9818 GenerateInfoForComponentLists(GroupLists, IsEligibleForTargetParamFlag);
9819 IsFirstGroup = false;
9820 }
9821 }
9822
9823 /// Generate the base pointers, section pointers, sizes, map types, and
9824 /// mappers associated to \a DeclComponentLists for a given capture
9825 /// \a VD (all included in \a CurComponentListInfo).
9826 void generateInfoForCaptureFromComponentLists(
9827 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
9828 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
9829 AttachInfoTy &AttachInfo, bool IsListEligibleForTargetParamFlag) const {
9830 // Find overlapping elements (including the offset from the base element).
9831 llvm::SmallDenseMap<
9832 const MapData *,
9833 llvm::SmallVector<
9835 4>
9836 OverlappedData;
9837 size_t Count = 0;
9838 for (const MapData &L : DeclComponentLists) {
9840 OpenMPMapClauseKind MapType;
9841 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9842 bool IsImplicit;
9843 const ValueDecl *Mapper;
9844 const Expr *VarRef;
9845 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9846 L;
9847 ++Count;
9848 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9850 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9851 VarRef) = L1;
9852 auto CI = Components.rbegin();
9853 auto CE = Components.rend();
9854 auto SI = Components1.rbegin();
9855 auto SE = Components1.rend();
9856 for (; CI != CE && SI != SE; ++CI, ++SI) {
9857 if (CI->getAssociatedExpression()->getStmtClass() !=
9858 SI->getAssociatedExpression()->getStmtClass())
9859 break;
9860 // Are we dealing with different variables/fields?
9861 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9862 break;
9863 }
9864 // Found overlapping if, at least for one component, reached the head
9865 // of the components list.
9866 if (CI == CE || SI == SE) {
9867 // Ignore it if it is the same component.
9868 if (CI == CE && SI == SE)
9869 continue;
9870 const auto It = (SI == SE) ? CI : SI;
9871 // If one component is a pointer and another one is a kind of
9872 // dereference of this pointer (array subscript, section, dereference,
9873 // etc.), it is not an overlapping.
9874 // Same, if one component is a base and another component is a
9875 // dereferenced pointer memberexpr with the same base.
9876 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9877 (std::prev(It)->getAssociatedDeclaration() &&
9878 std::prev(It)
9879 ->getAssociatedDeclaration()
9880 ->getType()
9881 ->isPointerType()) ||
9882 (It->getAssociatedDeclaration() &&
9883 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9884 std::next(It) != CE && std::next(It) != SE))
9885 continue;
9886 const MapData &BaseData = CI == CE ? L : L1;
9888 SI == SE ? Components : Components1;
9889 OverlappedData[&BaseData].push_back(SubData);
9890 }
9891 }
9892 }
9893 // Sort the overlapped elements for each item.
9894 llvm::SmallVector<const FieldDecl *, 4> Layout;
9895 if (!OverlappedData.empty()) {
9896 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9897 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9898 while (BaseType != OrigType) {
9899 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9900 OrigType = BaseType->getPointeeOrArrayElementType();
9901 }
9902
9903 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9904 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9905 else {
9906 const auto *RD = BaseType->getAsRecordDecl();
9907 Layout.append(RD->field_begin(), RD->field_end());
9908 }
9909 }
9910 for (auto &Pair : OverlappedData) {
9911 llvm::stable_sort(
9912 Pair.getSecond(),
9913 [&Layout](
9916 Second) {
9917 auto CI = First.rbegin();
9918 auto CE = First.rend();
9919 auto SI = Second.rbegin();
9920 auto SE = Second.rend();
9921 for (; CI != CE && SI != SE; ++CI, ++SI) {
9922 if (CI->getAssociatedExpression()->getStmtClass() !=
9923 SI->getAssociatedExpression()->getStmtClass())
9924 break;
9925 // Are we dealing with different variables/fields?
9926 if (CI->getAssociatedDeclaration() !=
9927 SI->getAssociatedDeclaration())
9928 break;
9929 }
9930
9931 // Lists contain the same elements.
9932 if (CI == CE && SI == SE)
9933 return false;
9934
9935 // List with less elements is less than list with more elements.
9936 if (CI == CE || SI == SE)
9937 return CI == CE;
9938
9939 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9940 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9941 if (FD1->getParent() == FD2->getParent())
9942 return FD1->getFieldIndex() < FD2->getFieldIndex();
9943 const auto *It =
9944 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9945 return FD == FD1 || FD == FD2;
9946 });
9947 return *It == FD1;
9948 });
9949 }
9950
9951 // Associated with a capture, because the mapping flags depend on it.
9952 // Go through all of the elements with the overlapped elements.
9953 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9954 MapCombinedInfoTy StructBaseCombinedInfo;
9955 for (const auto &Pair : OverlappedData) {
9956 const MapData &L = *Pair.getFirst();
9958 OpenMPMapClauseKind MapType;
9959 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9960 bool IsImplicit;
9961 const ValueDecl *Mapper;
9962 const Expr *VarRef;
9963 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9964 L;
9965 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9966 OverlappedComponents = Pair.getSecond();
9967 generateInfoForComponentList(
9968 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9969 StructBaseCombinedInfo, PartialStruct, AttachInfo, AddTargetParamFlag,
9970 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9971 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9972 AddTargetParamFlag = false;
9973 }
9974 // Go through other elements without overlapped elements.
9975 for (const MapData &L : DeclComponentLists) {
9977 OpenMPMapClauseKind MapType;
9978 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9979 bool IsImplicit;
9980 const ValueDecl *Mapper;
9981 const Expr *VarRef;
9982 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9983 L;
9984 auto It = OverlappedData.find(&L);
9985 if (It == OverlappedData.end())
9986 generateInfoForComponentList(
9987 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9988 StructBaseCombinedInfo, PartialStruct, AttachInfo,
9989 AddTargetParamFlag, IsImplicit, /*GenerateAllInfoForClauses*/ false,
9990 Mapper, /*ForDeviceAddr=*/false, VD, VarRef,
9991 /*OverlappedElements*/ {});
9992 AddTargetParamFlag = false;
9993 }
9994 }
9995
9996 /// Check if a variable should be treated as firstprivate due to explicit
9997 /// firstprivate clause or defaultmap(firstprivate:...).
9998 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
9999 // Check explicit firstprivate clauses (not implicit from defaultmap)
10000 auto I = FirstPrivateDecls.find(VD);
10001 if (I != FirstPrivateDecls.end() && !I->getSecond())
10002 return true; // Explicit firstprivate only
10003
10004 // Check defaultmap(firstprivate:scalar) for scalar types
10005 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
10006 if (Type->isScalarType())
10007 return true;
10008 }
10009
10010 // Check defaultmap(firstprivate:pointer) for pointer types
10011 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
10012 if (Type->isAnyPointerType())
10013 return true;
10014 }
10015
10016 // Check defaultmap(firstprivate:aggregate) for aggregate types
10017 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
10018 if (Type->isAggregateType())
10019 return true;
10020 }
10021
10022 // Check defaultmap(firstprivate:all) for all types
10023 return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
10024 }
10025
10026 /// Generate the default map information for a given capture \a CI,
10027 /// record field declaration \a RI and captured value \a CV.
10028 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
10029 const FieldDecl &RI, llvm::Value *CV,
10030 MapCombinedInfoTy &CombinedInfo) const {
10031 bool IsImplicit = true;
10032 // Do the default mapping.
10033 if (CI.capturesThis()) {
10034 CombinedInfo.Exprs.push_back(nullptr);
10035 CombinedInfo.BasePointers.push_back(CV);
10036 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10037 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10038 CombinedInfo.Pointers.push_back(CV);
10039 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
10040 CombinedInfo.Sizes.push_back(
10041 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
10042 CGF.Int64Ty, /*isSigned=*/true));
10043 // Default map type.
10044 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
10045 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
10046 } else if (CI.capturesVariableByCopy()) {
10047 const VarDecl *VD = CI.getCapturedVar();
10048 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10049 CombinedInfo.BasePointers.push_back(CV);
10050 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10051 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10052 CombinedInfo.Pointers.push_back(CV);
10053 bool IsFirstprivate =
10054 isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
10055
10056 if (!RI.getType()->isAnyPointerType()) {
10057 // We have to signal to the runtime captures passed by value that are
10058 // not pointers.
10059 CombinedInfo.Types.push_back(
10060 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10061 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10062 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
10063 } else if (IsFirstprivate) {
10064 // Firstprivate pointers should be passed by value (as literals)
10065 // without performing a present table lookup at runtime.
10066 CombinedInfo.Types.push_back(
10067 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10068 // Use zero size for pointer literals (just passing the pointer value)
10069 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10070 } else {
10071 // Pointers are implicitly mapped with a zero size and no flags
10072 // (other than first map that is added for all implicit maps).
10073 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
10074 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10075 }
10076 auto I = FirstPrivateDecls.find(VD);
10077 if (I != FirstPrivateDecls.end())
10078 IsImplicit = I->getSecond();
10079 } else {
10080 assert(CI.capturesVariable() && "Expected captured reference.");
10081 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
10082 QualType ElementType = PtrTy->getPointeeType();
10083 const VarDecl *VD = CI.getCapturedVar();
10084 bool IsFirstprivate = isEffectivelyFirstprivate(VD, ElementType);
10085 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
10086 CombinedInfo.BasePointers.push_back(CV);
10087 CombinedInfo.DevicePtrDecls.push_back(nullptr);
10088 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
10089
10090 // For firstprivate pointers, pass by value instead of dereferencing
10091 if (IsFirstprivate && ElementType->isAnyPointerType()) {
10092 // Treat as a literal value (pass the pointer value itself)
10093 CombinedInfo.Pointers.push_back(CV);
10094 // Use zero size for pointer literals
10095 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
10096 CombinedInfo.Types.push_back(
10097 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
10098 } else {
10099 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10100 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
10101 // The default map type for a scalar/complex type is 'to' because by
10102 // default the value doesn't have to be retrieved. For an aggregate
10103 // type, the default is 'tofrom'.
10104 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
10105 CombinedInfo.Pointers.push_back(CV);
10106 }
10107 auto I = FirstPrivateDecls.find(VD);
10108 if (I != FirstPrivateDecls.end())
10109 IsImplicit = I->getSecond();
10110 }
10111 // Every default map produces a single argument which is a target parameter.
10112 CombinedInfo.Types.back() |=
10113 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
10114
10115 // Add flag stating this is an implicit map.
10116 if (IsImplicit)
10117 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
10118
10119 // No user-defined mapper for default mapping.
10120 CombinedInfo.Mappers.push_back(nullptr);
10121 }
10122};
10123} // anonymous namespace
10124
10125// Try to extract the base declaration from a `this->x` expression if possible.
10127 if (!E)
10128 return nullptr;
10129
10130 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
10131 if (const MemberExpr *ME =
10132 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
10133 return ME->getMemberDecl();
10134 return nullptr;
10135}
10136
10137/// Emit a string constant containing the names of the values mapped to the
10138/// offloading runtime library.
10139static llvm::Constant *
10140emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
10141 MappableExprsHandler::MappingExprInfo &MapExprs) {
10142
10143 uint32_t SrcLocStrSize;
10144 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
10145 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
10146
10147 SourceLocation Loc;
10148 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
10149 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
10150 Loc = VD->getLocation();
10151 else
10152 Loc = MapExprs.getMapExpr()->getExprLoc();
10153 } else {
10154 Loc = MapExprs.getMapDecl()->getLocation();
10155 }
10156
10157 std::string ExprName;
10158 if (MapExprs.getMapExpr()) {
10160 llvm::raw_string_ostream OS(ExprName);
10161 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
10162 } else {
10163 ExprName = MapExprs.getMapDecl()->getNameAsString();
10164 }
10165
10166 std::string FileName;
10168 if (auto *DbgInfo = CGF.getDebugInfo())
10169 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
10170 else
10171 FileName = PLoc.getFilename();
10172 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
10173 PLoc.getColumn(), SrcLocStrSize);
10174}
10175/// Emit the arrays used to pass the captures and map information to the
10176/// offloading runtime library. If there is no map or capture information,
10177/// return nullptr by reference.
10179 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10180 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
10181 bool IsNonContiguous = false, bool ForEndCall = false) {
10182 CodeGenModule &CGM = CGF.CGM;
10183
10184 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10185 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10186 CGF.AllocaInsertPt->getIterator());
10187 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10188 CGF.Builder.GetInsertPoint());
10189
10190 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10191 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10192 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10193 }
10194 };
10195
10196 auto CustomMapperCB = [&](unsigned int I) {
10197 llvm::Function *MFunc = nullptr;
10198 if (CombinedInfo.Mappers[I]) {
10199 Info.HasMapper = true;
10201 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10202 }
10203 return MFunc;
10204 };
10205 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
10206 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
10207 IsNonContiguous, ForEndCall, DeviceAddrCB));
10208}
10209
10210/// Check for inner distribute directive.
10211static const OMPExecutableDirective *
10213 const auto *CS = D.getInnermostCapturedStmt();
10214 const auto *Body =
10215 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
10216 const Stmt *ChildStmt =
10218
10219 if (const auto *NestedDir =
10220 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10221 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
10222 switch (D.getDirectiveKind()) {
10223 case OMPD_target:
10224 // For now, treat 'target' with nested 'teams loop' as if it's
10225 // distributed (target teams distribute).
10226 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
10227 return NestedDir;
10228 if (DKind == OMPD_teams) {
10229 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
10230 /*IgnoreCaptured=*/true);
10231 if (!Body)
10232 return nullptr;
10233 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
10234 if (const auto *NND =
10235 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
10236 DKind = NND->getDirectiveKind();
10237 if (isOpenMPDistributeDirective(DKind))
10238 return NND;
10239 }
10240 }
10241 return nullptr;
10242 case OMPD_target_teams:
10243 if (isOpenMPDistributeDirective(DKind))
10244 return NestedDir;
10245 return nullptr;
10246 case OMPD_target_parallel:
10247 case OMPD_target_simd:
10248 case OMPD_target_parallel_for:
10249 case OMPD_target_parallel_for_simd:
10250 return nullptr;
10251 case OMPD_target_teams_distribute:
10252 case OMPD_target_teams_distribute_simd:
10253 case OMPD_target_teams_distribute_parallel_for:
10254 case OMPD_target_teams_distribute_parallel_for_simd:
10255 case OMPD_parallel:
10256 case OMPD_for:
10257 case OMPD_parallel_for:
10258 case OMPD_parallel_master:
10259 case OMPD_parallel_sections:
10260 case OMPD_for_simd:
10261 case OMPD_parallel_for_simd:
10262 case OMPD_cancel:
10263 case OMPD_cancellation_point:
10264 case OMPD_ordered:
10265 case OMPD_threadprivate:
10266 case OMPD_allocate:
10267 case OMPD_task:
10268 case OMPD_simd:
10269 case OMPD_tile:
10270 case OMPD_unroll:
10271 case OMPD_sections:
10272 case OMPD_section:
10273 case OMPD_single:
10274 case OMPD_master:
10275 case OMPD_critical:
10276 case OMPD_taskyield:
10277 case OMPD_barrier:
10278 case OMPD_taskwait:
10279 case OMPD_taskgroup:
10280 case OMPD_atomic:
10281 case OMPD_flush:
10282 case OMPD_depobj:
10283 case OMPD_scan:
10284 case OMPD_teams:
10285 case OMPD_target_data:
10286 case OMPD_target_exit_data:
10287 case OMPD_target_enter_data:
10288 case OMPD_distribute:
10289 case OMPD_distribute_simd:
10290 case OMPD_distribute_parallel_for:
10291 case OMPD_distribute_parallel_for_simd:
10292 case OMPD_teams_distribute:
10293 case OMPD_teams_distribute_simd:
10294 case OMPD_teams_distribute_parallel_for:
10295 case OMPD_teams_distribute_parallel_for_simd:
10296 case OMPD_target_update:
10297 case OMPD_declare_simd:
10298 case OMPD_declare_variant:
10299 case OMPD_begin_declare_variant:
10300 case OMPD_end_declare_variant:
10301 case OMPD_declare_target:
10302 case OMPD_end_declare_target:
10303 case OMPD_declare_reduction:
10304 case OMPD_declare_mapper:
10305 case OMPD_taskloop:
10306 case OMPD_taskloop_simd:
10307 case OMPD_master_taskloop:
10308 case OMPD_master_taskloop_simd:
10309 case OMPD_parallel_master_taskloop:
10310 case OMPD_parallel_master_taskloop_simd:
10311 case OMPD_requires:
10312 case OMPD_metadirective:
10313 case OMPD_unknown:
10314 default:
10315 llvm_unreachable("Unexpected directive.");
10316 }
10317 }
10318
10319 return nullptr;
10320}
10321
10322/// Emit the user-defined mapper function. The code generation follows the
10323/// pattern in the example below.
10324/// \code
10325/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
10326/// void *base, void *begin,
10327/// int64_t size, int64_t type,
10328/// void *name = nullptr) {
10329/// // Allocate space for an array section first or add a base/begin for
10330/// // pointer dereference.
10331/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
10332/// !maptype.IsDelete)
10333/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10334/// size*sizeof(Ty), clearToFromMember(type));
10335/// // Map members.
10336/// for (unsigned i = 0; i < size; i++) {
10337/// // For each component specified by this mapper:
10338/// for (auto c : begin[i]->all_components) {
10339/// if (c.hasMapper())
10340/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
10341/// c.arg_type, c.arg_name);
10342/// else
10343/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
10344/// c.arg_begin, c.arg_size, c.arg_type,
10345/// c.arg_name);
10346/// }
10347/// }
10348/// // Delete the array section.
10349/// if (size > 1 && maptype.IsDelete)
10350/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
10351/// size*sizeof(Ty), clearToFromMember(type));
10352/// }
10353/// \endcode
10355 CodeGenFunction *CGF) {
10356 if (UDMMap.count(D) > 0)
10357 return;
10358 ASTContext &C = CGM.getContext();
10359 QualType Ty = D->getType();
10360 auto *MapperVarDecl =
10362 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
10363 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
10364
10365 CodeGenFunction MapperCGF(CGM);
10366 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10367 auto PrivatizeAndGenMapInfoCB =
10368 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
10369 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10370 MapperCGF.Builder.restoreIP(CodeGenIP);
10371
10372 // Privatize the declared variable of mapper to be the current array
10373 // element.
10374 Address PtrCurrent(
10375 PtrPHI, ElemTy,
10376 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
10377 .getAlignment()
10378 .alignmentOfArrayElement(ElementSize));
10380 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10381 (void)Scope.Privatize();
10382
10383 // Get map clause information.
10384 MappableExprsHandler MEHandler(*D, MapperCGF);
10385 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
10386
10387 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10388 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
10389 };
10390 if (CGM.getCodeGenOpts().getDebugInfo() !=
10391 llvm::codegenoptions::NoDebugInfo) {
10392 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10393 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10394 FillInfoMap);
10395 }
10396
10397 return CombinedInfo;
10398 };
10399
10400 auto CustomMapperCB = [&](unsigned I) {
10401 llvm::Function *MapperFunc = nullptr;
10402 if (CombinedInfo.Mappers[I]) {
10403 // Call the corresponding mapper function.
10405 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10406 assert(MapperFunc && "Expect a valid mapper function is available.");
10407 }
10408 return MapperFunc;
10409 };
10410
10411 SmallString<64> TyStr;
10412 llvm::raw_svector_ostream Out(TyStr);
10413 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
10414 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10415
10416 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
10417 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
10418 UDMMap.try_emplace(D, NewFn);
10419 if (CGF)
10420 FunctionUDMMap[CGF->CurFn].push_back(D);
10421}
10422
10424 const OMPDeclareMapperDecl *D) {
10425 auto I = UDMMap.find(D);
10426 if (I != UDMMap.end())
10427 return I->second;
10429 return UDMMap.lookup(D);
10430}
10431
10434 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10435 const OMPLoopDirective &D)>
10436 SizeEmitter) {
10437 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10438 const OMPExecutableDirective *TD = &D;
10439 // Get nested teams distribute kind directive, if any. For now, treat
10440 // 'target_teams_loop' as if it's really a target_teams_distribute.
10441 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
10442 Kind != OMPD_target_teams_loop)
10443 TD = getNestedDistributeDirective(CGM.getContext(), D);
10444 if (!TD)
10445 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10446
10447 const auto *LD = cast<OMPLoopDirective>(TD);
10448 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10449 return NumIterations;
10450 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10451}
10452
10453static void
10454emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10455 const OMPExecutableDirective &D,
10457 bool RequiresOuterTask, const CapturedStmt &CS,
10458 bool OffloadingMandatory, CodeGenFunction &CGF) {
10459 if (OffloadingMandatory) {
10460 CGF.Builder.CreateUnreachable();
10461 } else {
10462 if (RequiresOuterTask) {
10463 CapturedVars.clear();
10464 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10465 }
10466 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
10467 CapturedVars);
10468 }
10469}
10470
10471static llvm::Value *emitDeviceID(
10472 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10473 CodeGenFunction &CGF) {
10474 // Emit device ID if any.
10475 llvm::Value *DeviceID;
10476 if (Device.getPointer()) {
10477 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10478 Device.getInt() == OMPC_DEVICE_device_num) &&
10479 "Expected device_num modifier.");
10480 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10481 DeviceID =
10482 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10483 } else {
10484 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10485 }
10486 return DeviceID;
10487}
10488
10489static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10491 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10492 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10493
10494 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10495 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10496 llvm::Value *DynGPVal =
10497 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10498 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10499 /*isSigned=*/false);
10500 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10501 switch (FallbackModifier) {
10502 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10503 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10504 break;
10505 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10506 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10507 break;
10508 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10510 // This is the default for dyn_groupprivate.
10511 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10512 break;
10513 default:
10514 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10515 }
10516 } else if (auto *OMPXDynCGClause =
10517 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10518 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10519 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10520 /*IgnoreResultAssign=*/true);
10521 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10522 /*isSigned=*/false);
10523 }
10524 return {DynGP, DynGPFallback};
10525}
10526
10528 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10529 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10530 llvm::OpenMPIRBuilder &OMPBuilder,
10531 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10532 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10533
10534 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10535 auto RI = CS.getCapturedRecordDecl()->field_begin();
10536 auto *CV = CapturedVars.begin();
10538 CE = CS.capture_end();
10539 CI != CE; ++CI, ++RI, ++CV) {
10540 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10541
10542 // VLA sizes are passed to the outlined region by copy and do not have map
10543 // information associated.
10544 if (CI->capturesVariableArrayType()) {
10545 CurInfo.Exprs.push_back(nullptr);
10546 CurInfo.BasePointers.push_back(*CV);
10547 CurInfo.DevicePtrDecls.push_back(nullptr);
10548 CurInfo.DevicePointers.push_back(
10549 MappableExprsHandler::DeviceInfoTy::None);
10550 CurInfo.Pointers.push_back(*CV);
10551 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10552 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10553 // Copy to the device as an argument. No need to retrieve it.
10554 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10555 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10556 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10557 CurInfo.Mappers.push_back(nullptr);
10558 } else {
10559 const ValueDecl *CapturedVD =
10560 CI->capturesThis() ? nullptr
10562 bool HasEntryWithCVAsAttachPtr = false;
10563 if (CapturedVD)
10564 HasEntryWithCVAsAttachPtr =
10565 MEHandler.hasAttachEntryForCapturedVar(CapturedVD);
10566
10567 // Populate component lists for the captured variable from clauses.
10568 MappableExprsHandler::MapDataArrayTy DeclComponentLists;
10571 StorageForImplicitlyAddedComponentLists;
10572 MEHandler.populateComponentListsForNonLambdaCaptureFromClauses(
10573 CapturedVD, DeclComponentLists,
10574 StorageForImplicitlyAddedComponentLists);
10575
10576 // OpenMP 6.0, 15.8, target construct, restrictions:
10577 // * A list item in a map clause that is specified on a target construct
10578 // must have a base variable or base pointer.
10579 //
10580 // Map clauses on a target construct must either have a base pointer, or a
10581 // base-variable. So, if we don't have a base-pointer, that means that it
10582 // must have a base-variable, i.e. we have a map like `map(s)`, `map(s.x)`
10583 // etc. In such cases, we do not need to handle default map generation
10584 // for `s`.
10585 bool HasEntryWithoutAttachPtr =
10586 llvm::any_of(DeclComponentLists, [&](const auto &MapData) {
10588 Components = std::get<0>(MapData);
10589 return !MEHandler.getAttachPtrExpr(Components);
10590 });
10591
10592 // Generate default map info first if there's no direct map with CV as
10593 // the base-variable, or attach pointer.
10594 if (DeclComponentLists.empty() ||
10595 (!HasEntryWithCVAsAttachPtr && !HasEntryWithoutAttachPtr))
10596 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10597
10598 // If we have any information in the map clause, we use it, otherwise we
10599 // just do a default mapping.
10600 MEHandler.generateInfoForCaptureFromClauseInfo(
10601 DeclComponentLists, CI, *CV, CurInfo, OMPBuilder,
10602 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10603
10604 if (!CI->capturesThis())
10605 MappedVarSet.insert(CI->getCapturedVar());
10606 else
10607 MappedVarSet.insert(nullptr);
10608
10609 // Generate correct mapping for variables captured by reference in
10610 // lambdas.
10611 if (CI->capturesVariable())
10612 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10613 CurInfo, LambdaPointers);
10614 }
10615 // We expect to have at least an element of information for this capture.
10616 assert(!CurInfo.BasePointers.empty() &&
10617 "Non-existing map pointer for capture!");
10618 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10619 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10620 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10621 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10622 "Inconsistent map information sizes!");
10623
10624 // We need to append the results of this capture to what we already have.
10625 CombinedInfo.append(CurInfo);
10626 }
10627 // Adjust MEMBER_OF flags for the lambdas captures.
10628 MEHandler.adjustMemberOfForLambdaCaptures(
10629 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10630 CombinedInfo.Pointers, CombinedInfo.Types);
10631}
10632static void
10633genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10634 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10635 llvm::OpenMPIRBuilder &OMPBuilder,
10636 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10637 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10638
10639 CodeGenModule &CGM = CGF.CGM;
10640 // Map any list items in a map clause that were not captures because they
10641 // weren't referenced within the construct.
10642 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10643
10644 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10645 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10646 };
10647 if (CGM.getCodeGenOpts().getDebugInfo() !=
10648 llvm::codegenoptions::NoDebugInfo) {
10649 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10650 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10651 FillInfoMap);
10652 }
10653}
10654
10656 const CapturedStmt &CS,
10658 llvm::OpenMPIRBuilder &OMPBuilder,
10659 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10660 // Get mappable expression information.
10661 MappableExprsHandler MEHandler(D, CGF);
10662 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10663
10664 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10665 MappedVarSet, CombinedInfo);
10666 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10667}
10668
10669template <typename ClauseTy>
10670static void
10672 const OMPExecutableDirective &D,
10674 const auto *C = D.getSingleClause<ClauseTy>();
10675 assert(!C->varlist_empty() &&
10676 "ompx_bare requires explicit num_teams and thread_limit");
10678 for (auto *E : C->varlist()) {
10679 llvm::Value *V = CGF.EmitScalarExpr(E);
10680 Values.push_back(
10681 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10682 }
10683}
10684
10686 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10687 const OMPExecutableDirective &D,
10688 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10689 const CapturedStmt &CS, bool OffloadingMandatory,
10690 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10691 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10692 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10693 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10694 const OMPLoopDirective &D)>
10695 SizeEmitter,
10696 CodeGenFunction &CGF, CodeGenModule &CGM) {
10697 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10698
10699 // Fill up the arrays with all the captured variables.
10700 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10702 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10703
10704 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10705 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10706
10707 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10708 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10709 CGF.VoidPtrTy, CGM.getPointerAlign());
10710 InputInfo.PointersArray =
10711 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10712 InputInfo.SizesArray =
10713 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10714 InputInfo.MappersArray =
10715 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10716 MapTypesArray = Info.RTArgs.MapTypesArray;
10717 MapNamesArray = Info.RTArgs.MapNamesArray;
10718
10719 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10720 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10721 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10722 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10723 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10724
10725 if (IsReverseOffloading) {
10726 // Reverse offloading is not supported, so just execute on the host.
10727 // FIXME: This fallback solution is incorrect since it ignores the
10728 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10729 // assert here and ensure SEMA emits an error.
10730 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10731 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10732 return;
10733 }
10734
10735 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10736 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10737
10738 llvm::Value *BasePointersArray =
10739 InputInfo.BasePointersArray.emitRawPointer(CGF);
10740 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10741 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10742 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10743
10744 auto &&EmitTargetCallFallbackCB =
10745 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10746 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10747 -> llvm::OpenMPIRBuilder::InsertPointTy {
10748 CGF.Builder.restoreIP(IP);
10749 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10750 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10751 return CGF.Builder.saveIP();
10752 };
10753
10754 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10757 if (IsBare) {
10760 NumThreads);
10761 } else {
10762 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10763 NumThreads.push_back(
10764 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10765 }
10766
10767 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10768 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10769 llvm::Value *NumIterations =
10770 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10771 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10772 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10773 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10774
10775 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10776 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10777 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10778
10779 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10780 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10781 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10782
10783 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10784 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10785 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10786 RTLoc, AllocaIP));
10787 CGF.Builder.restoreIP(AfterIP);
10788 };
10789
10790 if (RequiresOuterTask)
10791 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10792 else
10793 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10794}
10795
10796static void
10797emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10798 const OMPExecutableDirective &D,
10800 bool RequiresOuterTask, const CapturedStmt &CS,
10801 bool OffloadingMandatory, CodeGenFunction &CGF) {
10802
10803 // Notify that the host version must be executed.
10804 auto &&ElseGen =
10805 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10806 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10807 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10808 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10809 };
10810
10811 if (RequiresOuterTask) {
10813 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10814 } else {
10815 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10816 }
10817}
10818
10821 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10822 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10823 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10824 const OMPLoopDirective &D)>
10825 SizeEmitter) {
10826 if (!CGF.HaveInsertPoint())
10827 return;
10828
10829 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
10830 CGM.getLangOpts().OpenMPOffloadMandatory;
10831
10832 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10833
10834 const bool RequiresOuterTask =
10835 D.hasClausesOfKind<OMPDependClause>() ||
10836 D.hasClausesOfKind<OMPNowaitClause>() ||
10837 D.hasClausesOfKind<OMPInReductionClause>() ||
10838 (CGM.getLangOpts().OpenMP >= 51 &&
10839 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
10840 D.hasClausesOfKind<OMPThreadLimitClause>());
10842 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10843 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10844 PrePostActionTy &) {
10845 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10846 };
10847 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10848
10850 llvm::Value *MapTypesArray = nullptr;
10851 llvm::Value *MapNamesArray = nullptr;
10852
10853 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
10854 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10855 OutlinedFnID, &InputInfo, &MapTypesArray,
10856 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
10857 PrePostActionTy &) {
10858 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
10859 RequiresOuterTask, CS, OffloadingMandatory,
10860 Device, OutlinedFnID, InputInfo, MapTypesArray,
10861 MapNamesArray, SizeEmitter, CGF, CGM);
10862 };
10863
10864 auto &&TargetElseGen =
10865 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10866 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10867 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
10868 CS, OffloadingMandatory, CGF);
10869 };
10870
10871 // If we have a target function ID it means that we need to support
10872 // offloading, otherwise, just execute on the host. We need to execute on host
10873 // regardless of the conditional in the if clause if, e.g., the user do not
10874 // specify target triples.
10875 if (OutlinedFnID) {
10876 if (IfCond) {
10877 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10878 } else {
10879 RegionCodeGenTy ThenRCG(TargetThenGen);
10880 ThenRCG(CGF);
10881 }
10882 } else {
10883 RegionCodeGenTy ElseRCG(TargetElseGen);
10884 ElseRCG(CGF);
10885 }
10886}
10887
10889 StringRef ParentName) {
10890 if (!S)
10891 return;
10892
10893 // Codegen OMP target directives that offload compute to the device.
10894 bool RequiresDeviceCodegen =
10897 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10898
10899 if (RequiresDeviceCodegen) {
10900 const auto &E = *cast<OMPExecutableDirective>(S);
10901
10902 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
10903 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
10904
10905 // Is this a target region that should not be emitted as an entry point? If
10906 // so just signal we are done with this target region.
10907 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10908 return;
10909
10910 switch (E.getDirectiveKind()) {
10911 case OMPD_target:
10914 break;
10915 case OMPD_target_parallel:
10917 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10918 break;
10919 case OMPD_target_teams:
10921 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10922 break;
10923 case OMPD_target_teams_distribute:
10926 break;
10927 case OMPD_target_teams_distribute_simd:
10930 break;
10931 case OMPD_target_parallel_for:
10934 break;
10935 case OMPD_target_parallel_for_simd:
10938 break;
10939 case OMPD_target_simd:
10941 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10942 break;
10943 case OMPD_target_teams_distribute_parallel_for:
10945 CGM, ParentName,
10947 break;
10948 case OMPD_target_teams_distribute_parallel_for_simd:
10951 CGM, ParentName,
10953 break;
10954 case OMPD_target_teams_loop:
10957 break;
10958 case OMPD_target_parallel_loop:
10961 break;
10962 case OMPD_parallel:
10963 case OMPD_for:
10964 case OMPD_parallel_for:
10965 case OMPD_parallel_master:
10966 case OMPD_parallel_sections:
10967 case OMPD_for_simd:
10968 case OMPD_parallel_for_simd:
10969 case OMPD_cancel:
10970 case OMPD_cancellation_point:
10971 case OMPD_ordered:
10972 case OMPD_threadprivate:
10973 case OMPD_allocate:
10974 case OMPD_task:
10975 case OMPD_simd:
10976 case OMPD_tile:
10977 case OMPD_unroll:
10978 case OMPD_sections:
10979 case OMPD_section:
10980 case OMPD_single:
10981 case OMPD_master:
10982 case OMPD_critical:
10983 case OMPD_taskyield:
10984 case OMPD_barrier:
10985 case OMPD_taskwait:
10986 case OMPD_taskgroup:
10987 case OMPD_atomic:
10988 case OMPD_flush:
10989 case OMPD_depobj:
10990 case OMPD_scan:
10991 case OMPD_teams:
10992 case OMPD_target_data:
10993 case OMPD_target_exit_data:
10994 case OMPD_target_enter_data:
10995 case OMPD_distribute:
10996 case OMPD_distribute_simd:
10997 case OMPD_distribute_parallel_for:
10998 case OMPD_distribute_parallel_for_simd:
10999 case OMPD_teams_distribute:
11000 case OMPD_teams_distribute_simd:
11001 case OMPD_teams_distribute_parallel_for:
11002 case OMPD_teams_distribute_parallel_for_simd:
11003 case OMPD_target_update:
11004 case OMPD_declare_simd:
11005 case OMPD_declare_variant:
11006 case OMPD_begin_declare_variant:
11007 case OMPD_end_declare_variant:
11008 case OMPD_declare_target:
11009 case OMPD_end_declare_target:
11010 case OMPD_declare_reduction:
11011 case OMPD_declare_mapper:
11012 case OMPD_taskloop:
11013 case OMPD_taskloop_simd:
11014 case OMPD_master_taskloop:
11015 case OMPD_master_taskloop_simd:
11016 case OMPD_parallel_master_taskloop:
11017 case OMPD_parallel_master_taskloop_simd:
11018 case OMPD_requires:
11019 case OMPD_metadirective:
11020 case OMPD_unknown:
11021 default:
11022 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
11023 }
11024 return;
11025 }
11026
11027 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
11028 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
11029 return;
11030
11031 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
11032 return;
11033 }
11034
11035 // If this is a lambda function, look into its body.
11036 if (const auto *L = dyn_cast<LambdaExpr>(S))
11037 S = L->getBody();
11038
11039 // Keep looking for target regions recursively.
11040 for (const Stmt *II : S->children())
11041 scanForTargetRegionsFunctions(II, ParentName);
11042}
11043
11044static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
11045 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
11046 OMPDeclareTargetDeclAttr::getDeviceType(VD);
11047 if (!DevTy)
11048 return false;
11049 // Do not emit device_type(nohost) functions for the host.
11050 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
11051 return true;
11052 // Do not emit device_type(host) functions for the device.
11053 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
11054 return true;
11055 return false;
11056}
11057
11059 // If emitting code for the host, we do not process FD here. Instead we do
11060 // the normal code generation.
11061 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
11062 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
11064 CGM.getLangOpts().OpenMPIsTargetDevice))
11065 return true;
11066 return false;
11067 }
11068
11069 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
11070 // Try to detect target regions in the function.
11071 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
11072 StringRef Name = CGM.getMangledName(GD);
11075 CGM.getLangOpts().OpenMPIsTargetDevice))
11076 return true;
11077 }
11078
11079 // Do not to emit function if it is not marked as declare target.
11080 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
11081 AlreadyEmittedTargetDecls.count(VD) == 0;
11082}
11083
11086 CGM.getLangOpts().OpenMPIsTargetDevice))
11087 return true;
11088
11089 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
11090 return false;
11091
11092 // Check if there are Ctors/Dtors in this declaration and look for target
11093 // regions in it. We use the complete variant to produce the kernel name
11094 // mangling.
11095 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
11096 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
11097 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
11098 StringRef ParentName =
11099 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
11100 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
11101 }
11102 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
11103 StringRef ParentName =
11104 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
11105 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
11106 }
11107 }
11108
11109 // Do not to emit variable if it is not marked as declare target.
11110 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11111 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
11112 cast<VarDecl>(GD.getDecl()));
11113 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
11114 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11115 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11118 return true;
11119 }
11120 return false;
11121}
11122
11124 llvm::Constant *Addr) {
11125 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
11126 !CGM.getLangOpts().OpenMPIsTargetDevice)
11127 return;
11128
11129 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11130 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11131
11132 // If this is an 'extern' declaration we defer to the canonical definition and
11133 // do not emit an offloading entry.
11134 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
11135 VD->hasExternalStorage())
11136 return;
11137
11138 if (!Res) {
11139 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11140 // Register non-target variables being emitted in device code (debug info
11141 // may cause this).
11142 StringRef VarName = CGM.getMangledName(VD);
11143 EmittedNonTargetVariables.try_emplace(VarName, Addr);
11144 }
11145 return;
11146 }
11147
11148 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
11149 auto LinkageForVariable = [&VD, this]() {
11150 return CGM.getLLVMLinkageVarDefinition(VD);
11151 };
11152
11153 std::vector<llvm::GlobalVariable *> GeneratedRefs;
11154 OMPBuilder.registerTargetGlobalVariable(
11156 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
11157 VD->isExternallyVisible(),
11159 VD->getCanonicalDecl()->getBeginLoc()),
11160 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
11161 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
11162 CGM.getTypes().ConvertTypeForMem(
11163 CGM.getContext().getPointerType(VD->getType())),
11164 Addr);
11165
11166 for (auto *ref : GeneratedRefs)
11167 CGM.addCompilerUsedGlobal(ref);
11168}
11169
11171 if (isa<FunctionDecl>(GD.getDecl()) ||
11173 return emitTargetFunctions(GD);
11174
11175 return emitTargetGlobalVariable(GD);
11176}
11177
11179 for (const VarDecl *VD : DeferredGlobalVariables) {
11180 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
11181 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
11182 if (!Res)
11183 continue;
11184 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11185 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11187 CGM.EmitGlobal(VD);
11188 } else {
11189 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
11190 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
11191 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
11193 "Expected link clause or to clause with unified memory.");
11194 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
11195 }
11196 }
11197}
11198
11200 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
11201 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
11202 " Expected target-based directive.");
11203}
11204
11206 for (const OMPClause *Clause : D->clauselists()) {
11207 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
11209 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
11210 } else if (const auto *AC =
11211 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
11212 switch (AC->getAtomicDefaultMemOrderKind()) {
11213 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
11214 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
11215 break;
11216 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
11217 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
11218 break;
11219 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
11220 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
11221 break;
11223 break;
11224 }
11225 }
11226 }
11227}
11228
11229llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11231}
11232
11234 LangAS &AS) {
11235 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11236 return false;
11237 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11238 switch(A->getAllocatorType()) {
11239 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11240 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11241 // Not supported, fallback to the default mem space.
11242 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11243 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11244 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11245 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11246 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11247 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11248 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11249 AS = LangAS::Default;
11250 return true;
11251 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11252 llvm_unreachable("Expected predefined allocator for the variables with the "
11253 "static storage.");
11254 }
11255 return false;
11256}
11257
11261
11263 CodeGenModule &CGM)
11264 : CGM(CGM) {
11265 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
11266 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11267 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11268 }
11269}
11270
11272 if (CGM.getLangOpts().OpenMPIsTargetDevice)
11273 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11274}
11275
11277 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
11278 return true;
11279
11280 const auto *D = cast<FunctionDecl>(GD.getDecl());
11281 // Do not to emit function if it is marked as declare target as it was already
11282 // emitted.
11283 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11284 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11285 if (auto *F = dyn_cast_or_null<llvm::Function>(
11286 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11287 return !F->isDeclaration();
11288 return false;
11289 }
11290 return true;
11291 }
11292
11293 return !AlreadyEmittedTargetDecls.insert(D).second;
11294}
11295
11297 const OMPExecutableDirective &D,
11298 SourceLocation Loc,
11299 llvm::Function *OutlinedFn,
11300 ArrayRef<llvm::Value *> CapturedVars) {
11301 if (!CGF.HaveInsertPoint())
11302 return;
11303
11304 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11306
11307 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11308 llvm::Value *Args[] = {
11309 RTLoc,
11310 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11311 OutlinedFn};
11313 RealArgs.append(std::begin(Args), std::end(Args));
11314 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11315
11316 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11317 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11318 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11319}
11320
11322 const Expr *NumTeams,
11323 const Expr *ThreadLimit,
11324 SourceLocation Loc) {
11325 if (!CGF.HaveInsertPoint())
11326 return;
11327
11328 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11329
11330 llvm::Value *NumTeamsVal =
11331 NumTeams
11332 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11333 CGF.CGM.Int32Ty, /* isSigned = */ true)
11334 : CGF.Builder.getInt32(0);
11335
11336 llvm::Value *ThreadLimitVal =
11337 ThreadLimit
11338 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11339 CGF.CGM.Int32Ty, /* isSigned = */ true)
11340 : CGF.Builder.getInt32(0);
11341
11342 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11343 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11344 ThreadLimitVal};
11345 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11346 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11347 PushNumTeamsArgs);
11348}
11349
11351 const Expr *ThreadLimit,
11352 SourceLocation Loc) {
11353 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11354 llvm::Value *ThreadLimitVal =
11355 ThreadLimit
11356 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11357 CGF.CGM.Int32Ty, /* isSigned = */ true)
11358 : CGF.Builder.getInt32(0);
11359
11360 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
11361 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
11362 ThreadLimitVal};
11363 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11364 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
11365 ThreadLimitArgs);
11366}
11367
11369 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11370 const Expr *Device, const RegionCodeGenTy &CodeGen,
11372 if (!CGF.HaveInsertPoint())
11373 return;
11374
11375 // Action used to replace the default codegen action and turn privatization
11376 // off.
11377 PrePostActionTy NoPrivAction;
11378
11379 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
11380
11381 llvm::Value *IfCondVal = nullptr;
11382 if (IfCond)
11383 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
11384
11385 // Emit device ID if any.
11386 llvm::Value *DeviceID = nullptr;
11387 if (Device) {
11388 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11389 CGF.Int64Ty, /*isSigned=*/true);
11390 } else {
11391 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11392 }
11393
11394 // Fill up the arrays with all the mapped variables.
11395 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11396 auto GenMapInfoCB =
11397 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
11398 CGF.Builder.restoreIP(CodeGenIP);
11399 // Get map clause information.
11400 MappableExprsHandler MEHandler(D, CGF);
11401 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
11402
11403 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
11404 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
11405 };
11406 if (CGM.getCodeGenOpts().getDebugInfo() !=
11407 llvm::codegenoptions::NoDebugInfo) {
11408 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
11409 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
11410 FillInfoMap);
11411 }
11412
11413 return CombinedInfo;
11414 };
11415 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
11416 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
11417 CGF.Builder.restoreIP(CodeGenIP);
11418 switch (BodyGenType) {
11419 case BodyGenTy::Priv:
11420 if (!Info.CaptureDeviceAddrMap.empty())
11421 CodeGen(CGF);
11422 break;
11423 case BodyGenTy::DupNoPriv:
11424 if (!Info.CaptureDeviceAddrMap.empty()) {
11425 CodeGen.setAction(NoPrivAction);
11426 CodeGen(CGF);
11427 }
11428 break;
11429 case BodyGenTy::NoPriv:
11430 if (Info.CaptureDeviceAddrMap.empty()) {
11431 CodeGen.setAction(NoPrivAction);
11432 CodeGen(CGF);
11433 }
11434 break;
11435 }
11436 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11437 CGF.Builder.GetInsertPoint());
11438 };
11439
11440 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11441 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11442 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
11443 }
11444 };
11445
11446 auto CustomMapperCB = [&](unsigned int I) {
11447 llvm::Function *MFunc = nullptr;
11448 if (CombinedInfo.Mappers[I]) {
11449 Info.HasMapper = true;
11451 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
11452 }
11453 return MFunc;
11454 };
11455
11456 // Source location for the ident struct
11457 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11458
11459 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11460 CGF.AllocaInsertPt->getIterator());
11461 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11462 CGF.Builder.GetInsertPoint());
11463 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11464 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11465 cantFail(OMPBuilder.createTargetData(
11466 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
11467 CustomMapperCB,
11468 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
11469 CGF.Builder.restoreIP(AfterIP);
11470}
11471
11473 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11474 const Expr *Device) {
11475 if (!CGF.HaveInsertPoint())
11476 return;
11477
11481 "Expecting either target enter, exit data, or update directives.");
11482
11484 llvm::Value *MapTypesArray = nullptr;
11485 llvm::Value *MapNamesArray = nullptr;
11486 // Generate the code for the opening of the data environment.
11487 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11488 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11489 // Emit device ID if any.
11490 llvm::Value *DeviceID = nullptr;
11491 if (Device) {
11492 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11493 CGF.Int64Ty, /*isSigned=*/true);
11494 } else {
11495 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11496 }
11497
11498 // Emit the number of elements in the offloading arrays.
11499 llvm::Constant *PointerNum =
11500 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11501
11502 // Source location for the ident struct
11503 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11504
11505 SmallVector<llvm::Value *, 13> OffloadingArgs(
11506 {RTLoc, DeviceID, PointerNum,
11507 InputInfo.BasePointersArray.emitRawPointer(CGF),
11508 InputInfo.PointersArray.emitRawPointer(CGF),
11509 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11510 InputInfo.MappersArray.emitRawPointer(CGF)});
11511
11512 // Select the right runtime function call for each standalone
11513 // directive.
11514 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11515 RuntimeFunction RTLFn;
11516 switch (D.getDirectiveKind()) {
11517 case OMPD_target_enter_data:
11518 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11519 : OMPRTL___tgt_target_data_begin_mapper;
11520 break;
11521 case OMPD_target_exit_data:
11522 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11523 : OMPRTL___tgt_target_data_end_mapper;
11524 break;
11525 case OMPD_target_update:
11526 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11527 : OMPRTL___tgt_target_data_update_mapper;
11528 break;
11529 case OMPD_parallel:
11530 case OMPD_for:
11531 case OMPD_parallel_for:
11532 case OMPD_parallel_master:
11533 case OMPD_parallel_sections:
11534 case OMPD_for_simd:
11535 case OMPD_parallel_for_simd:
11536 case OMPD_cancel:
11537 case OMPD_cancellation_point:
11538 case OMPD_ordered:
11539 case OMPD_threadprivate:
11540 case OMPD_allocate:
11541 case OMPD_task:
11542 case OMPD_simd:
11543 case OMPD_tile:
11544 case OMPD_unroll:
11545 case OMPD_sections:
11546 case OMPD_section:
11547 case OMPD_single:
11548 case OMPD_master:
11549 case OMPD_critical:
11550 case OMPD_taskyield:
11551 case OMPD_barrier:
11552 case OMPD_taskwait:
11553 case OMPD_taskgroup:
11554 case OMPD_atomic:
11555 case OMPD_flush:
11556 case OMPD_depobj:
11557 case OMPD_scan:
11558 case OMPD_teams:
11559 case OMPD_target_data:
11560 case OMPD_distribute:
11561 case OMPD_distribute_simd:
11562 case OMPD_distribute_parallel_for:
11563 case OMPD_distribute_parallel_for_simd:
11564 case OMPD_teams_distribute:
11565 case OMPD_teams_distribute_simd:
11566 case OMPD_teams_distribute_parallel_for:
11567 case OMPD_teams_distribute_parallel_for_simd:
11568 case OMPD_declare_simd:
11569 case OMPD_declare_variant:
11570 case OMPD_begin_declare_variant:
11571 case OMPD_end_declare_variant:
11572 case OMPD_declare_target:
11573 case OMPD_end_declare_target:
11574 case OMPD_declare_reduction:
11575 case OMPD_declare_mapper:
11576 case OMPD_taskloop:
11577 case OMPD_taskloop_simd:
11578 case OMPD_master_taskloop:
11579 case OMPD_master_taskloop_simd:
11580 case OMPD_parallel_master_taskloop:
11581 case OMPD_parallel_master_taskloop_simd:
11582 case OMPD_target:
11583 case OMPD_target_simd:
11584 case OMPD_target_teams_distribute:
11585 case OMPD_target_teams_distribute_simd:
11586 case OMPD_target_teams_distribute_parallel_for:
11587 case OMPD_target_teams_distribute_parallel_for_simd:
11588 case OMPD_target_teams:
11589 case OMPD_target_parallel:
11590 case OMPD_target_parallel_for:
11591 case OMPD_target_parallel_for_simd:
11592 case OMPD_requires:
11593 case OMPD_metadirective:
11594 case OMPD_unknown:
11595 default:
11596 llvm_unreachable("Unexpected standalone target data directive.");
11597 break;
11598 }
11599 if (HasNowait) {
11600 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11601 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11602 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11603 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11604 }
11605 CGF.EmitRuntimeCall(
11606 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11607 OffloadingArgs);
11608 };
11609
11610 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11611 &MapNamesArray](CodeGenFunction &CGF,
11612 PrePostActionTy &) {
11613 // Fill up the arrays with all the mapped variables.
11614 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11616 MappableExprsHandler MEHandler(D, CGF);
11617 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11618 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11619 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11620
11621 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11622 D.hasClausesOfKind<OMPNowaitClause>();
11623
11624 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11625 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11626 CGF.VoidPtrTy, CGM.getPointerAlign());
11627 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11628 CGM.getPointerAlign());
11629 InputInfo.SizesArray =
11630 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11631 InputInfo.MappersArray =
11632 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11633 MapTypesArray = Info.RTArgs.MapTypesArray;
11634 MapNamesArray = Info.RTArgs.MapNamesArray;
11635 if (RequiresOuterTask)
11636 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11637 else
11638 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11639 };
11640
11641 if (IfCond) {
11642 emitIfClause(CGF, IfCond, TargetThenGen,
11643 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11644 } else {
11645 RegionCodeGenTy ThenRCG(TargetThenGen);
11646 ThenRCG(CGF);
11647 }
11648}
11649
11650namespace {
11651 /// Kind of parameter in a function with 'declare simd' directive.
11652enum ParamKindTy {
11653 Linear,
11654 LinearRef,
11655 LinearUVal,
11656 LinearVal,
11657 Uniform,
11658 Vector,
11659};
11660/// Attribute set of the parameter.
11661struct ParamAttrTy {
11662 ParamKindTy Kind = Vector;
11663 llvm::APSInt StrideOrArg;
11664 llvm::APSInt Alignment;
11665 bool HasVarStride = false;
11666};
11667} // namespace
11668
11669static unsigned evaluateCDTSize(const FunctionDecl *FD,
11670 ArrayRef<ParamAttrTy> ParamAttrs) {
11671 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11672 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11673 // of that clause. The VLEN value must be power of 2.
11674 // In other case the notion of the function`s "characteristic data type" (CDT)
11675 // is used to compute the vector length.
11676 // CDT is defined in the following order:
11677 // a) For non-void function, the CDT is the return type.
11678 // b) If the function has any non-uniform, non-linear parameters, then the
11679 // CDT is the type of the first such parameter.
11680 // c) If the CDT determined by a) or b) above is struct, union, or class
11681 // type which is pass-by-value (except for the type that maps to the
11682 // built-in complex data type), the characteristic data type is int.
11683 // d) If none of the above three cases is applicable, the CDT is int.
11684 // The VLEN is then determined based on the CDT and the size of vector
11685 // register of that ISA for which current vector version is generated. The
11686 // VLEN is computed using the formula below:
11687 // VLEN = sizeof(vector_register) / sizeof(CDT),
11688 // where vector register size specified in section 3.2.1 Registers and the
11689 // Stack Frame of original AMD64 ABI document.
11690 QualType RetType = FD->getReturnType();
11691 if (RetType.isNull())
11692 return 0;
11693 ASTContext &C = FD->getASTContext();
11694 QualType CDT;
11695 if (!RetType.isNull() && !RetType->isVoidType()) {
11696 CDT = RetType;
11697 } else {
11698 unsigned Offset = 0;
11699 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11700 if (ParamAttrs[Offset].Kind == Vector)
11701 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11702 ++Offset;
11703 }
11704 if (CDT.isNull()) {
11705 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11706 if (ParamAttrs[I + Offset].Kind == Vector) {
11707 CDT = FD->getParamDecl(I)->getType();
11708 break;
11709 }
11710 }
11711 }
11712 }
11713 if (CDT.isNull())
11714 CDT = C.IntTy;
11715 CDT = CDT->getCanonicalTypeUnqualified();
11716 if (CDT->isRecordType() || CDT->isUnionType())
11717 CDT = C.IntTy;
11718 return C.getTypeSize(CDT);
11719}
11720
11721/// Mangle the parameter part of the vector function name according to
11722/// their OpenMP classification. The mangling function is defined in
11723/// section 4.5 of the AAVFABI(2021Q1).
11724static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11725 SmallString<256> Buffer;
11726 llvm::raw_svector_ostream Out(Buffer);
11727 for (const auto &ParamAttr : ParamAttrs) {
11728 switch (ParamAttr.Kind) {
11729 case Linear:
11730 Out << 'l';
11731 break;
11732 case LinearRef:
11733 Out << 'R';
11734 break;
11735 case LinearUVal:
11736 Out << 'U';
11737 break;
11738 case LinearVal:
11739 Out << 'L';
11740 break;
11741 case Uniform:
11742 Out << 'u';
11743 break;
11744 case Vector:
11745 Out << 'v';
11746 break;
11747 }
11748 if (ParamAttr.HasVarStride)
11749 Out << "s" << ParamAttr.StrideOrArg;
11750 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11751 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11752 // Don't print the step value if it is not present or if it is
11753 // equal to 1.
11754 if (ParamAttr.StrideOrArg < 0)
11755 Out << 'n' << -ParamAttr.StrideOrArg;
11756 else if (ParamAttr.StrideOrArg != 1)
11757 Out << ParamAttr.StrideOrArg;
11758 }
11759
11760 if (!!ParamAttr.Alignment)
11761 Out << 'a' << ParamAttr.Alignment;
11762 }
11763
11764 return std::string(Out.str());
11765}
11766
11767static void
11768emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11769 const llvm::APSInt &VLENVal,
11770 ArrayRef<ParamAttrTy> ParamAttrs,
11771 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11772 struct ISADataTy {
11773 char ISA;
11774 unsigned VecRegSize;
11775 };
11776 ISADataTy ISAData[] = {
11777 {
11778 'b', 128
11779 }, // SSE
11780 {
11781 'c', 256
11782 }, // AVX
11783 {
11784 'd', 256
11785 }, // AVX2
11786 {
11787 'e', 512
11788 }, // AVX512
11789 };
11791 switch (State) {
11792 case OMPDeclareSimdDeclAttr::BS_Undefined:
11793 Masked.push_back('N');
11794 Masked.push_back('M');
11795 break;
11796 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11797 Masked.push_back('N');
11798 break;
11799 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11800 Masked.push_back('M');
11801 break;
11802 }
11803 for (char Mask : Masked) {
11804 for (const ISADataTy &Data : ISAData) {
11805 SmallString<256> Buffer;
11806 llvm::raw_svector_ostream Out(Buffer);
11807 Out << "_ZGV" << Data.ISA << Mask;
11808 if (!VLENVal) {
11809 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11810 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11811 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11812 } else {
11813 Out << VLENVal;
11814 }
11815 Out << mangleVectorParameters(ParamAttrs);
11816 Out << '_' << Fn->getName();
11817 Fn->addFnAttr(Out.str());
11818 }
11819 }
11820}
11821
11822// This are the Functions that are needed to mangle the name of the
11823// vector functions generated by the compiler, according to the rules
11824// defined in the "Vector Function ABI specifications for AArch64",
11825// available at
11826// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11827
11828/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11829static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11830 QT = QT.getCanonicalType();
11831
11832 if (QT->isVoidType())
11833 return false;
11834
11835 if (Kind == ParamKindTy::Uniform)
11836 return false;
11837
11838 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
11839 return false;
11840
11841 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11842 !QT->isReferenceType())
11843 return false;
11844
11845 return true;
11846}
11847
11848/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11850 QT = QT.getCanonicalType();
11851 unsigned Size = C.getTypeSize(QT);
11852
11853 // Only scalars and complex within 16 bytes wide set PVB to true.
11854 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11855 return false;
11856
11857 if (QT->isFloatingType())
11858 return true;
11859
11860 if (QT->isIntegerType())
11861 return true;
11862
11863 if (QT->isPointerType())
11864 return true;
11865
11866 // TODO: Add support for complex types (section 3.1.2, item 2).
11867
11868 return false;
11869}
11870
11871/// Computes the lane size (LS) of a return type or of an input parameter,
11872/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11873/// TODO: Add support for references, section 3.2.1, item 1.
11874static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11875 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11877 if (getAArch64PBV(PTy, C))
11878 return C.getTypeSize(PTy);
11879 }
11880 if (getAArch64PBV(QT, C))
11881 return C.getTypeSize(QT);
11882
11883 return C.getTypeSize(C.getUIntPtrType());
11884}
11885
11886// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11887// signature of the scalar function, as defined in 3.2.2 of the
11888// AAVFABI.
11889static std::tuple<unsigned, unsigned, bool>
11891 QualType RetType = FD->getReturnType().getCanonicalType();
11892
11893 ASTContext &C = FD->getASTContext();
11894
11895 bool OutputBecomesInput = false;
11896
11898 if (!RetType->isVoidType()) {
11899 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11900 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11901 OutputBecomesInput = true;
11902 }
11903 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11905 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11906 }
11907
11908 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11909 // The LS of a function parameter / return value can only be a power
11910 // of 2, starting from 8 bits, up to 128.
11911 assert(llvm::all_of(Sizes,
11912 [](unsigned Size) {
11913 return Size == 8 || Size == 16 || Size == 32 ||
11914 Size == 64 || Size == 128;
11915 }) &&
11916 "Invalid size");
11917
11918 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
11919 OutputBecomesInput);
11920}
11921
11922// Function used to add the attribute. The parameter `VLEN` is
11923// templated to allow the use of "x" when targeting scalable functions
11924// for SVE.
11925template <typename T>
11926static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11927 char ISA, StringRef ParSeq,
11928 StringRef MangledName, bool OutputBecomesInput,
11929 llvm::Function *Fn) {
11930 SmallString<256> Buffer;
11931 llvm::raw_svector_ostream Out(Buffer);
11932 Out << Prefix << ISA << LMask << VLEN;
11933 if (OutputBecomesInput)
11934 Out << "v";
11935 Out << ParSeq << "_" << MangledName;
11936 Fn->addFnAttr(Out.str());
11937}
11938
11939// Helper function to generate the Advanced SIMD names depending on
11940// the value of the NDS when simdlen is not present.
11941static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11942 StringRef Prefix, char ISA,
11943 StringRef ParSeq, StringRef MangledName,
11944 bool OutputBecomesInput,
11945 llvm::Function *Fn) {
11946 switch (NDS) {
11947 case 8:
11948 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11949 OutputBecomesInput, Fn);
11950 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11951 OutputBecomesInput, Fn);
11952 break;
11953 case 16:
11954 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11955 OutputBecomesInput, Fn);
11956 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11957 OutputBecomesInput, Fn);
11958 break;
11959 case 32:
11960 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11961 OutputBecomesInput, Fn);
11962 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11963 OutputBecomesInput, Fn);
11964 break;
11965 case 64:
11966 case 128:
11967 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11968 OutputBecomesInput, Fn);
11969 break;
11970 default:
11971 llvm_unreachable("Scalar type is too wide.");
11972 }
11973}
11974
11975/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11977 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11978 ArrayRef<ParamAttrTy> ParamAttrs,
11979 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11980 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11981
11982 // Get basic data for building the vector signature.
11983 const auto Data = getNDSWDS(FD, ParamAttrs);
11984 const unsigned NDS = std::get<0>(Data);
11985 const unsigned WDS = std::get<1>(Data);
11986 const bool OutputBecomesInput = std::get<2>(Data);
11987
11988 // Check the values provided via `simdlen` by the user.
11989 // 1. A `simdlen(1)` doesn't produce vector signatures,
11990 if (UserVLEN == 1) {
11991 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11993 "The clause simdlen(1) has no effect when targeting aarch64.");
11994 CGM.getDiags().Report(SLoc, DiagID);
11995 return;
11996 }
11997
11998 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11999 // Advanced SIMD output.
12000 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
12001 unsigned DiagID = CGM.getDiags().getCustomDiagID(
12002 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
12003 "power of 2 when targeting Advanced SIMD.");
12004 CGM.getDiags().Report(SLoc, DiagID);
12005 return;
12006 }
12007
12008 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
12009 // limits.
12010 if (ISA == 's' && UserVLEN != 0) {
12011 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
12012 unsigned DiagID = CGM.getDiags().getCustomDiagID(
12013 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
12014 "lanes in the architectural constraints "
12015 "for SVE (min is 128-bit, max is "
12016 "2048-bit, by steps of 128-bit)");
12017 CGM.getDiags().Report(SLoc, DiagID) << WDS;
12018 return;
12019 }
12020 }
12021
12022 // Sort out parameter sequence.
12023 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
12024 StringRef Prefix = "_ZGV";
12025 // Generate simdlen from user input (if any).
12026 if (UserVLEN) {
12027 if (ISA == 's') {
12028 // SVE generates only a masked function.
12029 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12030 OutputBecomesInput, Fn);
12031 } else {
12032 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12033 // Advanced SIMD generates one or two functions, depending on
12034 // the `[not]inbranch` clause.
12035 switch (State) {
12036 case OMPDeclareSimdDeclAttr::BS_Undefined:
12037 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
12038 OutputBecomesInput, Fn);
12039 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12040 OutputBecomesInput, Fn);
12041 break;
12042 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12043 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
12044 OutputBecomesInput, Fn);
12045 break;
12046 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12047 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
12048 OutputBecomesInput, Fn);
12049 break;
12050 }
12051 }
12052 } else {
12053 // If no user simdlen is provided, follow the AAVFABI rules for
12054 // generating the vector length.
12055 if (ISA == 's') {
12056 // SVE, section 3.4.1, item 1.
12057 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
12058 OutputBecomesInput, Fn);
12059 } else {
12060 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
12061 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
12062 // two vector names depending on the use of the clause
12063 // `[not]inbranch`.
12064 switch (State) {
12065 case OMPDeclareSimdDeclAttr::BS_Undefined:
12066 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
12067 OutputBecomesInput, Fn);
12068 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
12069 OutputBecomesInput, Fn);
12070 break;
12071 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
12072 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
12073 OutputBecomesInput, Fn);
12074 break;
12075 case OMPDeclareSimdDeclAttr::BS_Inbranch:
12076 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
12077 OutputBecomesInput, Fn);
12078 break;
12079 }
12080 }
12081 }
12082}
12083
12085 llvm::Function *Fn) {
12086 ASTContext &C = CGM.getContext();
12087 FD = FD->getMostRecentDecl();
12088 while (FD) {
12089 // Map params to their positions in function decl.
12090 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
12091 if (isa<CXXMethodDecl>(FD))
12092 ParamPositions.try_emplace(FD, 0);
12093 unsigned ParamPos = ParamPositions.size();
12094 for (const ParmVarDecl *P : FD->parameters()) {
12095 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
12096 ++ParamPos;
12097 }
12098 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
12099 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
12100 // Mark uniform parameters.
12101 for (const Expr *E : Attr->uniforms()) {
12102 E = E->IgnoreParenImpCasts();
12103 unsigned Pos;
12104 if (isa<CXXThisExpr>(E)) {
12105 Pos = ParamPositions[FD];
12106 } else {
12107 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12108 ->getCanonicalDecl();
12109 auto It = ParamPositions.find(PVD);
12110 assert(It != ParamPositions.end() && "Function parameter not found");
12111 Pos = It->second;
12112 }
12113 ParamAttrs[Pos].Kind = Uniform;
12114 }
12115 // Get alignment info.
12116 auto *NI = Attr->alignments_begin();
12117 for (const Expr *E : Attr->aligneds()) {
12118 E = E->IgnoreParenImpCasts();
12119 unsigned Pos;
12120 QualType ParmTy;
12121 if (isa<CXXThisExpr>(E)) {
12122 Pos = ParamPositions[FD];
12123 ParmTy = E->getType();
12124 } else {
12125 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12126 ->getCanonicalDecl();
12127 auto It = ParamPositions.find(PVD);
12128 assert(It != ParamPositions.end() && "Function parameter not found");
12129 Pos = It->second;
12130 ParmTy = PVD->getType();
12131 }
12132 ParamAttrs[Pos].Alignment =
12133 (*NI)
12134 ? (*NI)->EvaluateKnownConstInt(C)
12135 : llvm::APSInt::getUnsigned(
12136 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
12137 .getQuantity());
12138 ++NI;
12139 }
12140 // Mark linear parameters.
12141 auto *SI = Attr->steps_begin();
12142 auto *MI = Attr->modifiers_begin();
12143 for (const Expr *E : Attr->linears()) {
12144 E = E->IgnoreParenImpCasts();
12145 unsigned Pos;
12146 bool IsReferenceType = false;
12147 // Rescaling factor needed to compute the linear parameter
12148 // value in the mangled name.
12149 unsigned PtrRescalingFactor = 1;
12150 if (isa<CXXThisExpr>(E)) {
12151 Pos = ParamPositions[FD];
12152 auto *P = cast<PointerType>(E->getType());
12153 PtrRescalingFactor = CGM.getContext()
12154 .getTypeSizeInChars(P->getPointeeType())
12155 .getQuantity();
12156 } else {
12157 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12158 ->getCanonicalDecl();
12159 auto It = ParamPositions.find(PVD);
12160 assert(It != ParamPositions.end() && "Function parameter not found");
12161 Pos = It->second;
12162 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12163 PtrRescalingFactor = CGM.getContext()
12164 .getTypeSizeInChars(P->getPointeeType())
12165 .getQuantity();
12166 else if (PVD->getType()->isReferenceType()) {
12167 IsReferenceType = true;
12168 PtrRescalingFactor =
12169 CGM.getContext()
12170 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
12171 .getQuantity();
12172 }
12173 }
12174 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12175 if (*MI == OMPC_LINEAR_ref)
12176 ParamAttr.Kind = LinearRef;
12177 else if (*MI == OMPC_LINEAR_uval)
12178 ParamAttr.Kind = LinearUVal;
12179 else if (IsReferenceType)
12180 ParamAttr.Kind = LinearVal;
12181 else
12182 ParamAttr.Kind = Linear;
12183 // Assuming a stride of 1, for `linear` without modifiers.
12184 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12185 if (*SI) {
12187 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12188 if (const auto *DRE =
12189 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12190 if (const auto *StridePVD =
12191 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12192 ParamAttr.HasVarStride = true;
12193 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12194 assert(It != ParamPositions.end() &&
12195 "Function parameter not found");
12196 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12197 }
12198 }
12199 } else {
12200 ParamAttr.StrideOrArg = Result.Val.getInt();
12201 }
12202 }
12203 // If we are using a linear clause on a pointer, we need to
12204 // rescale the value of linear_step with the byte size of the
12205 // pointee type.
12206 if (!ParamAttr.HasVarStride &&
12207 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
12208 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12209 ++SI;
12210 ++MI;
12211 }
12212 llvm::APSInt VLENVal;
12213 SourceLocation ExprLoc;
12214 const Expr *VLENExpr = Attr->getSimdlen();
12215 if (VLENExpr) {
12216 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12217 ExprLoc = VLENExpr->getExprLoc();
12218 }
12219 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12220 if (CGM.getTriple().isX86()) {
12221 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12222 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12223 unsigned VLEN = VLENVal.getExtValue();
12224 StringRef MangledName = Fn->getName();
12225 if (CGM.getTarget().hasFeature("sve"))
12226 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12227 MangledName, 's', 128, Fn, ExprLoc);
12228 else if (CGM.getTarget().hasFeature("neon"))
12229 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12230 MangledName, 'n', 128, Fn, ExprLoc);
12231 }
12232 }
12233 FD = FD->getPreviousDecl();
12234 }
12235}
12236
12237namespace {
12238/// Cleanup action for doacross support.
12239class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12240public:
12241 static const int DoacrossFinArgs = 2;
12242
12243private:
12244 llvm::FunctionCallee RTLFn;
12245 llvm::Value *Args[DoacrossFinArgs];
12246
12247public:
12248 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12249 ArrayRef<llvm::Value *> CallArgs)
12250 : RTLFn(RTLFn) {
12251 assert(CallArgs.size() == DoacrossFinArgs);
12252 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12253 }
12254 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12255 if (!CGF.HaveInsertPoint())
12256 return;
12257 CGF.EmitRuntimeCall(RTLFn, Args);
12258 }
12259};
12260} // namespace
12261
12263 const OMPLoopDirective &D,
12264 ArrayRef<Expr *> NumIterations) {
12265 if (!CGF.HaveInsertPoint())
12266 return;
12267
12268 ASTContext &C = CGM.getContext();
12269 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12270 RecordDecl *RD;
12271 if (KmpDimTy.isNull()) {
12272 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12273 // kmp_int64 lo; // lower
12274 // kmp_int64 up; // upper
12275 // kmp_int64 st; // stride
12276 // };
12277 RD = C.buildImplicitRecord("kmp_dim");
12278 RD->startDefinition();
12279 addFieldToRecordDecl(C, RD, Int64Ty);
12280 addFieldToRecordDecl(C, RD, Int64Ty);
12281 addFieldToRecordDecl(C, RD, Int64Ty);
12282 RD->completeDefinition();
12283 KmpDimTy = C.getCanonicalTagType(RD);
12284 } else {
12285 RD = KmpDimTy->castAsRecordDecl();
12286 }
12287 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12288 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
12290
12291 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12292 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12293 enum { LowerFD = 0, UpperFD, StrideFD };
12294 // Fill dims with data.
12295 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12296 LValue DimsLVal = CGF.MakeAddrLValue(
12297 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12298 // dims.upper = num_iterations;
12299 LValue UpperLVal = CGF.EmitLValueForField(
12300 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12301 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12302 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12303 Int64Ty, NumIterations[I]->getExprLoc());
12304 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12305 // dims.stride = 1;
12306 LValue StrideLVal = CGF.EmitLValueForField(
12307 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12308 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12309 StrideLVal);
12310 }
12311
12312 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12313 // kmp_int32 num_dims, struct kmp_dim * dims);
12314 llvm::Value *Args[] = {
12315 emitUpdateLocation(CGF, D.getBeginLoc()),
12316 getThreadID(CGF, D.getBeginLoc()),
12317 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12319 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
12320 CGM.VoidPtrTy)};
12321
12322 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12323 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12324 CGF.EmitRuntimeCall(RTLFn, Args);
12325 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12326 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12327 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12328 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12329 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12330 llvm::ArrayRef(FiniArgs));
12331}
12332
12333template <typename T>
12335 const T *C, llvm::Value *ULoc,
12336 llvm::Value *ThreadID) {
12337 QualType Int64Ty =
12338 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12339 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12341 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
12342 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12343 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12344 const Expr *CounterVal = C->getLoopData(I);
12345 assert(CounterVal);
12346 llvm::Value *CntVal = CGF.EmitScalarConversion(
12347 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12348 CounterVal->getExprLoc());
12349 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12350 /*Volatile=*/false, Int64Ty);
12351 }
12352 llvm::Value *Args[] = {
12353 ULoc, ThreadID,
12354 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
12355 llvm::FunctionCallee RTLFn;
12356 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
12357 OMPDoacrossKind<T> ODK;
12358 if (ODK.isSource(C)) {
12359 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12360 OMPRTL___kmpc_doacross_post);
12361 } else {
12362 assert(ODK.isSink(C) && "Expect sink modifier.");
12363 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12364 OMPRTL___kmpc_doacross_wait);
12365 }
12366 CGF.EmitRuntimeCall(RTLFn, Args);
12367}
12368
12370 const OMPDependClause *C) {
12372 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12373 getThreadID(CGF, C->getBeginLoc()));
12374}
12375
12377 const OMPDoacrossClause *C) {
12379 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
12380 getThreadID(CGF, C->getBeginLoc()));
12381}
12382
12384 llvm::FunctionCallee Callee,
12385 ArrayRef<llvm::Value *> Args) const {
12386 assert(Loc.isValid() && "Outlined function call location must be valid.");
12388
12389 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12390 if (Fn->doesNotThrow()) {
12391 CGF.EmitNounwindRuntimeCall(Fn, Args);
12392 return;
12393 }
12394 }
12395 CGF.EmitRuntimeCall(Callee, Args);
12396}
12397
12399 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12400 ArrayRef<llvm::Value *> Args) const {
12401 emitCall(CGF, Loc, OutlinedFn, Args);
12402}
12403
12405 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12406 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12408}
12409
12411 const VarDecl *NativeParam,
12412 const VarDecl *TargetParam) const {
12413 return CGF.GetAddrOfLocalVar(NativeParam);
12414}
12415
12416/// Return allocator value from expression, or return a null allocator (default
12417/// when no allocator specified).
12418static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12419 const Expr *Allocator) {
12420 llvm::Value *AllocVal;
12421 if (Allocator) {
12422 AllocVal = CGF.EmitScalarExpr(Allocator);
12423 // According to the standard, the original allocator type is a enum
12424 // (integer). Convert to pointer type, if required.
12425 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12426 CGF.getContext().VoidPtrTy,
12427 Allocator->getExprLoc());
12428 } else {
12429 // If no allocator specified, it defaults to the null allocator.
12430 AllocVal = llvm::Constant::getNullValue(
12432 }
12433 return AllocVal;
12434}
12435
12436/// Return the alignment from an allocate directive if present.
12437static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12438 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12439
12440 if (!AllocateAlignment)
12441 return nullptr;
12442
12443 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12444}
12445
12447 const VarDecl *VD) {
12448 if (!VD)
12449 return Address::invalid();
12450 Address UntiedAddr = Address::invalid();
12451 Address UntiedRealAddr = Address::invalid();
12452 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12453 if (It != FunctionToUntiedTaskStackMap.end()) {
12454 const UntiedLocalVarsAddressesMap &UntiedData =
12455 UntiedLocalVarsStack[It->second];
12456 auto I = UntiedData.find(VD);
12457 if (I != UntiedData.end()) {
12458 UntiedAddr = I->second.first;
12459 UntiedRealAddr = I->second.second;
12460 }
12461 }
12462 const VarDecl *CVD = VD->getCanonicalDecl();
12463 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12464 // Use the default allocation.
12465 if (!isAllocatableDecl(VD))
12466 return UntiedAddr;
12467 llvm::Value *Size;
12468 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12469 if (CVD->getType()->isVariablyModifiedType()) {
12470 Size = CGF.getTypeSize(CVD->getType());
12471 // Align the size: ((size + align - 1) / align) * align
12472 Size = CGF.Builder.CreateNUWAdd(
12473 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12474 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12475 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12476 } else {
12477 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12478 Size = CGM.getSize(Sz.alignTo(Align));
12479 }
12480 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12481 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12482 const Expr *Allocator = AA->getAllocator();
12483 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12484 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12486 Args.push_back(ThreadID);
12487 if (Alignment)
12488 Args.push_back(Alignment);
12489 Args.push_back(Size);
12490 Args.push_back(AllocVal);
12491 llvm::omp::RuntimeFunction FnID =
12492 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12493 llvm::Value *Addr = CGF.EmitRuntimeCall(
12494 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12495 getName({CVD->getName(), ".void.addr"}));
12496 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12497 CGM.getModule(), OMPRTL___kmpc_free);
12498 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12500 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12501 if (UntiedAddr.isValid())
12502 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12503
12504 // Cleanup action for allocate support.
12505 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12506 llvm::FunctionCallee RTLFn;
12507 SourceLocation::UIntTy LocEncoding;
12508 Address Addr;
12509 const Expr *AllocExpr;
12510
12511 public:
12512 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12513 SourceLocation::UIntTy LocEncoding, Address Addr,
12514 const Expr *AllocExpr)
12515 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12516 AllocExpr(AllocExpr) {}
12517 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12518 if (!CGF.HaveInsertPoint())
12519 return;
12520 llvm::Value *Args[3];
12521 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12522 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12524 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12525 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12526 Args[2] = AllocVal;
12527 CGF.EmitRuntimeCall(RTLFn, Args);
12528 }
12529 };
12530 Address VDAddr =
12531 UntiedRealAddr.isValid()
12532 ? UntiedRealAddr
12533 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12534 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12535 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12536 VDAddr, Allocator);
12537 if (UntiedRealAddr.isValid())
12538 if (auto *Region =
12539 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12540 Region->emitUntiedSwitch(CGF);
12541 return VDAddr;
12542 }
12543 return UntiedAddr;
12544}
12545
12547 const VarDecl *VD) const {
12548 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12549 if (It == FunctionToUntiedTaskStackMap.end())
12550 return false;
12551 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12552}
12553
12555 CodeGenModule &CGM, const OMPLoopDirective &S)
12556 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12557 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12558 if (!NeedToPush)
12559 return;
12561 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12562 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12563 for (const Stmt *Ref : C->private_refs()) {
12564 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12565 const ValueDecl *VD;
12566 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12567 VD = DRE->getDecl();
12568 } else {
12569 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12570 assert((ME->isImplicitCXXThis() ||
12571 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12572 "Expected member of current class.");
12573 VD = ME->getMemberDecl();
12574 }
12575 DS.insert(VD);
12576 }
12577 }
12578}
12579
12581 if (!NeedToPush)
12582 return;
12583 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12584}
12585
12587 CodeGenFunction &CGF,
12588 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12589 std::pair<Address, Address>> &LocalVars)
12590 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12591 if (!NeedToPush)
12592 return;
12593 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12594 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12595 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12596}
12597
12599 if (!NeedToPush)
12600 return;
12601 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12602}
12603
12605 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12606
12607 return llvm::any_of(
12608 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12609 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12610}
12611
12612void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12613 const OMPExecutableDirective &S,
12614 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12615 const {
12616 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12617 // Vars in target/task regions must be excluded completely.
12618 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12619 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12621 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12622 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12623 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12624 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12625 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12626 }
12627 }
12628 // Exclude vars in private clauses.
12629 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12630 for (const Expr *Ref : C->varlist()) {
12631 if (!Ref->getType()->isScalarType())
12632 continue;
12633 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12634 if (!DRE)
12635 continue;
12636 NeedToCheckForLPCs.insert(DRE->getDecl());
12637 }
12638 }
12639 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12640 for (const Expr *Ref : C->varlist()) {
12641 if (!Ref->getType()->isScalarType())
12642 continue;
12643 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12644 if (!DRE)
12645 continue;
12646 NeedToCheckForLPCs.insert(DRE->getDecl());
12647 }
12648 }
12649 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12650 for (const Expr *Ref : C->varlist()) {
12651 if (!Ref->getType()->isScalarType())
12652 continue;
12653 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12654 if (!DRE)
12655 continue;
12656 NeedToCheckForLPCs.insert(DRE->getDecl());
12657 }
12658 }
12659 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12660 for (const Expr *Ref : C->varlist()) {
12661 if (!Ref->getType()->isScalarType())
12662 continue;
12663 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12664 if (!DRE)
12665 continue;
12666 NeedToCheckForLPCs.insert(DRE->getDecl());
12667 }
12668 }
12669 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12670 for (const Expr *Ref : C->varlist()) {
12671 if (!Ref->getType()->isScalarType())
12672 continue;
12673 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12674 if (!DRE)
12675 continue;
12676 NeedToCheckForLPCs.insert(DRE->getDecl());
12677 }
12678 }
12679 for (const Decl *VD : NeedToCheckForLPCs) {
12680 for (const LastprivateConditionalData &Data :
12681 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12682 if (Data.DeclToUniqueName.count(VD) > 0) {
12683 if (!Data.Disabled)
12684 NeedToAddForLPCsAsDisabled.insert(VD);
12685 break;
12686 }
12687 }
12688 }
12689}
12690
12691CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12692 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12693 : CGM(CGF.CGM),
12694 Action((CGM.getLangOpts().OpenMP >= 50 &&
12695 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12696 [](const OMPLastprivateClause *C) {
12697 return C->getKind() ==
12698 OMPC_LASTPRIVATE_conditional;
12699 }))
12700 ? ActionToDo::PushAsLastprivateConditional
12701 : ActionToDo::DoNotPush) {
12702 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12703 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12704 return;
12705 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12706 "Expected a push action.");
12708 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12709 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12710 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12711 continue;
12712
12713 for (const Expr *Ref : C->varlist()) {
12714 Data.DeclToUniqueName.insert(std::make_pair(
12715 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12716 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12717 }
12718 }
12719 Data.IVLVal = IVLVal;
12720 Data.Fn = CGF.CurFn;
12721}
12722
12723CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12725 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12726 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12727 if (CGM.getLangOpts().OpenMP < 50)
12728 return;
12729 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12730 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12731 if (!NeedToAddForLPCsAsDisabled.empty()) {
12732 Action = ActionToDo::DisableLastprivateConditional;
12733 LastprivateConditionalData &Data =
12735 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12736 Data.DeclToUniqueName.try_emplace(VD);
12737 Data.Fn = CGF.CurFn;
12738 Data.Disabled = true;
12739 }
12740}
12741
12742CGOpenMPRuntime::LastprivateConditionalRAII
12744 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12745 return LastprivateConditionalRAII(CGF, S);
12746}
12747
12749 if (CGM.getLangOpts().OpenMP < 50)
12750 return;
12751 if (Action == ActionToDo::DisableLastprivateConditional) {
12752 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12753 "Expected list of disabled private vars.");
12754 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12755 }
12756 if (Action == ActionToDo::PushAsLastprivateConditional) {
12757 assert(
12758 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12759 "Expected list of lastprivate conditional vars.");
12760 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12761 }
12762}
12763
12765 const VarDecl *VD) {
12766 ASTContext &C = CGM.getContext();
12767 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12768 QualType NewType;
12769 const FieldDecl *VDField;
12770 const FieldDecl *FiredField;
12771 LValue BaseLVal;
12772 auto VI = I->getSecond().find(VD);
12773 if (VI == I->getSecond().end()) {
12774 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12775 RD->startDefinition();
12776 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12777 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12778 RD->completeDefinition();
12779 NewType = C.getCanonicalTagType(RD);
12780 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12781 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12782 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12783 } else {
12784 NewType = std::get<0>(VI->getSecond());
12785 VDField = std::get<1>(VI->getSecond());
12786 FiredField = std::get<2>(VI->getSecond());
12787 BaseLVal = std::get<3>(VI->getSecond());
12788 }
12789 LValue FiredLVal =
12790 CGF.EmitLValueForField(BaseLVal, FiredField);
12792 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12793 FiredLVal);
12794 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12795}
12796
12797namespace {
12798/// Checks if the lastprivate conditional variable is referenced in LHS.
12799class LastprivateConditionalRefChecker final
12800 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12802 const Expr *FoundE = nullptr;
12803 const Decl *FoundD = nullptr;
12804 StringRef UniqueDeclName;
12805 LValue IVLVal;
12806 llvm::Function *FoundFn = nullptr;
12807 SourceLocation Loc;
12808
12809public:
12810 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12812 llvm::reverse(LPM)) {
12813 auto It = D.DeclToUniqueName.find(E->getDecl());
12814 if (It == D.DeclToUniqueName.end())
12815 continue;
12816 if (D.Disabled)
12817 return false;
12818 FoundE = E;
12819 FoundD = E->getDecl()->getCanonicalDecl();
12820 UniqueDeclName = It->second;
12821 IVLVal = D.IVLVal;
12822 FoundFn = D.Fn;
12823 break;
12824 }
12825 return FoundE == E;
12826 }
12827 bool VisitMemberExpr(const MemberExpr *E) {
12829 return false;
12831 llvm::reverse(LPM)) {
12832 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12833 if (It == D.DeclToUniqueName.end())
12834 continue;
12835 if (D.Disabled)
12836 return false;
12837 FoundE = E;
12838 FoundD = E->getMemberDecl()->getCanonicalDecl();
12839 UniqueDeclName = It->second;
12840 IVLVal = D.IVLVal;
12841 FoundFn = D.Fn;
12842 break;
12843 }
12844 return FoundE == E;
12845 }
12846 bool VisitStmt(const Stmt *S) {
12847 for (const Stmt *Child : S->children()) {
12848 if (!Child)
12849 continue;
12850 if (const auto *E = dyn_cast<Expr>(Child))
12851 if (!E->isGLValue())
12852 continue;
12853 if (Visit(Child))
12854 return true;
12855 }
12856 return false;
12857 }
12858 explicit LastprivateConditionalRefChecker(
12859 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12860 : LPM(LPM) {}
12861 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12862 getFoundData() const {
12863 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12864 }
12865};
12866} // namespace
12867
12869 LValue IVLVal,
12870 StringRef UniqueDeclName,
12871 LValue LVal,
12872 SourceLocation Loc) {
12873 // Last updated loop counter for the lastprivate conditional var.
12874 // int<xx> last_iv = 0;
12875 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12876 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12877 LLIVTy, getName({UniqueDeclName, "iv"}));
12878 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12879 IVLVal.getAlignment().getAsAlign());
12880 LValue LastIVLVal =
12881 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12882
12883 // Last value of the lastprivate conditional.
12884 // decltype(priv_a) last_a;
12885 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12886 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12887 cast<llvm::GlobalVariable>(Last)->setAlignment(
12888 LVal.getAlignment().getAsAlign());
12889 LValue LastLVal =
12890 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12891
12892 // Global loop counter. Required to handle inner parallel-for regions.
12893 // iv
12894 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12895
12896 // #pragma omp critical(a)
12897 // if (last_iv <= iv) {
12898 // last_iv = iv;
12899 // last_a = priv_a;
12900 // }
12901 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12902 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12903 Action.Enter(CGF);
12904 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12905 // (last_iv <= iv) ? Check if the variable is updated and store new
12906 // value in global var.
12907 llvm::Value *CmpRes;
12908 if (IVLVal.getType()->isSignedIntegerType()) {
12909 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12910 } else {
12911 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12912 "Loop iteration variable must be integer.");
12913 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12914 }
12915 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12916 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12917 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12918 // {
12919 CGF.EmitBlock(ThenBB);
12920
12921 // last_iv = iv;
12922 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12923
12924 // last_a = priv_a;
12925 switch (CGF.getEvaluationKind(LVal.getType())) {
12926 case TEK_Scalar: {
12927 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12928 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12929 break;
12930 }
12931 case TEK_Complex: {
12932 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12933 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12934 break;
12935 }
12936 case TEK_Aggregate:
12937 llvm_unreachable(
12938 "Aggregates are not supported in lastprivate conditional.");
12939 }
12940 // }
12941 CGF.EmitBranch(ExitBB);
12942 // There is no need to emit line number for unconditional branch.
12944 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12945 };
12946
12947 if (CGM.getLangOpts().OpenMPSimd) {
12948 // Do not emit as a critical region as no parallel region could be emitted.
12949 RegionCodeGenTy ThenRCG(CodeGen);
12950 ThenRCG(CGF);
12951 } else {
12952 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12953 }
12954}
12955
12957 const Expr *LHS) {
12958 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12959 return;
12960 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12961 if (!Checker.Visit(LHS))
12962 return;
12963 const Expr *FoundE;
12964 const Decl *FoundD;
12965 StringRef UniqueDeclName;
12966 LValue IVLVal;
12967 llvm::Function *FoundFn;
12968 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12969 Checker.getFoundData();
12970 if (FoundFn != CGF.CurFn) {
12971 // Special codegen for inner parallel regions.
12972 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12973 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12974 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12975 "Lastprivate conditional is not found in outer region.");
12976 QualType StructTy = std::get<0>(It->getSecond());
12977 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12978 LValue PrivLVal = CGF.EmitLValue(FoundE);
12980 PrivLVal.getAddress(),
12981 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12982 CGF.ConvertTypeForMem(StructTy));
12983 LValue BaseLVal =
12984 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12985 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12986 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12987 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12988 FiredLVal, llvm::AtomicOrdering::Unordered,
12989 /*IsVolatile=*/true, /*isInit=*/false);
12990 return;
12991 }
12992
12993 // Private address of the lastprivate conditional in the current context.
12994 // priv_a
12995 LValue LVal = CGF.EmitLValue(FoundE);
12996 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12997 FoundE->getExprLoc());
12998}
12999
13002 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
13003 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
13004 return;
13005 auto Range = llvm::reverse(LastprivateConditionalStack);
13006 auto It = llvm::find_if(
13007 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
13008 if (It == Range.end() || It->Fn != CGF.CurFn)
13009 return;
13010 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
13011 assert(LPCI != LastprivateConditionalToTypes.end() &&
13012 "Lastprivates must be registered already.");
13014 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
13015 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
13016 for (const auto &Pair : It->DeclToUniqueName) {
13017 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
13018 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
13019 continue;
13020 auto I = LPCI->getSecond().find(Pair.first);
13021 assert(I != LPCI->getSecond().end() &&
13022 "Lastprivate must be rehistered already.");
13023 // bool Cmp = priv_a.Fired != 0;
13024 LValue BaseLVal = std::get<3>(I->getSecond());
13025 LValue FiredLVal =
13026 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
13027 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
13028 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
13029 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
13030 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
13031 // if (Cmp) {
13032 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
13033 CGF.EmitBlock(ThenBB);
13034 Address Addr = CGF.GetAddrOfLocalVar(VD);
13035 LValue LVal;
13036 if (VD->getType()->isReferenceType())
13037 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
13039 else
13040 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
13042 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
13043 D.getBeginLoc());
13045 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
13046 // }
13047 }
13048}
13049
13051 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
13052 SourceLocation Loc) {
13053 if (CGF.getLangOpts().OpenMP < 50)
13054 return;
13055 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
13056 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
13057 "Unknown lastprivate conditional variable.");
13058 StringRef UniqueName = It->second;
13059 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
13060 // The variable was not updated in the region - exit.
13061 if (!GV)
13062 return;
13063 LValue LPLVal = CGF.MakeRawAddrLValue(
13064 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
13065 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
13066 CGF.EmitStoreOfScalar(Res, PrivLVal);
13067}
13068
13071 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13072 const RegionCodeGenTy &CodeGen) {
13073 llvm_unreachable("Not supported in SIMD-only mode");
13074}
13075
13078 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
13079 const RegionCodeGenTy &CodeGen) {
13080 llvm_unreachable("Not supported in SIMD-only mode");
13081}
13082
13084 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
13085 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
13086 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
13087 bool Tied, unsigned &NumberOfParts) {
13088 llvm_unreachable("Not supported in SIMD-only mode");
13089}
13090
13092 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
13093 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
13094 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
13095 OpenMPSeverityClauseKind Severity, const Expr *Message) {
13096 llvm_unreachable("Not supported in SIMD-only mode");
13097}
13098
13100 CodeGenFunction &CGF, StringRef CriticalName,
13101 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
13102 const Expr *Hint) {
13103 llvm_unreachable("Not supported in SIMD-only mode");
13104}
13105
13107 const RegionCodeGenTy &MasterOpGen,
13108 SourceLocation Loc) {
13109 llvm_unreachable("Not supported in SIMD-only mode");
13110}
13111
13113 const RegionCodeGenTy &MasterOpGen,
13114 SourceLocation Loc,
13115 const Expr *Filter) {
13116 llvm_unreachable("Not supported in SIMD-only mode");
13117}
13118
13120 SourceLocation Loc) {
13121 llvm_unreachable("Not supported in SIMD-only mode");
13122}
13123
13125 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
13126 SourceLocation Loc) {
13127 llvm_unreachable("Not supported in SIMD-only mode");
13128}
13129
13131 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
13132 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
13134 ArrayRef<const Expr *> AssignmentOps) {
13135 llvm_unreachable("Not supported in SIMD-only mode");
13136}
13137
13139 const RegionCodeGenTy &OrderedOpGen,
13140 SourceLocation Loc,
13141 bool IsThreads) {
13142 llvm_unreachable("Not supported in SIMD-only mode");
13143}
13144
13146 SourceLocation Loc,
13148 bool EmitChecks,
13149 bool ForceSimpleCall) {
13150 llvm_unreachable("Not supported in SIMD-only mode");
13151}
13152
13155 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
13156 bool Ordered, const DispatchRTInput &DispatchValues) {
13157 llvm_unreachable("Not supported in SIMD-only mode");
13158}
13159
13161 SourceLocation Loc) {
13162 llvm_unreachable("Not supported in SIMD-only mode");
13163}
13164
13167 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
13168 llvm_unreachable("Not supported in SIMD-only mode");
13169}
13170
13173 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
13174 llvm_unreachable("Not supported in SIMD-only mode");
13175}
13176
13178 SourceLocation Loc,
13179 unsigned IVSize,
13180 bool IVSigned) {
13181 llvm_unreachable("Not supported in SIMD-only mode");
13182}
13183
13185 SourceLocation Loc,
13186 OpenMPDirectiveKind DKind) {
13187 llvm_unreachable("Not supported in SIMD-only mode");
13188}
13189
13191 SourceLocation Loc,
13192 unsigned IVSize, bool IVSigned,
13193 Address IL, Address LB,
13194 Address UB, Address ST) {
13195 llvm_unreachable("Not supported in SIMD-only mode");
13196}
13197
13199 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
13201 SourceLocation SeverityLoc, const Expr *Message,
13202 SourceLocation MessageLoc) {
13203 llvm_unreachable("Not supported in SIMD-only mode");
13204}
13205
13207 ProcBindKind ProcBind,
13208 SourceLocation Loc) {
13209 llvm_unreachable("Not supported in SIMD-only mode");
13210}
13211
13213 const VarDecl *VD,
13214 Address VDAddr,
13215 SourceLocation Loc) {
13216 llvm_unreachable("Not supported in SIMD-only mode");
13217}
13218
13220 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13221 CodeGenFunction *CGF) {
13222 llvm_unreachable("Not supported in SIMD-only mode");
13223}
13224
13226 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13227 llvm_unreachable("Not supported in SIMD-only mode");
13228}
13229
13232 SourceLocation Loc,
13233 llvm::AtomicOrdering AO) {
13234 llvm_unreachable("Not supported in SIMD-only mode");
13235}
13236
13238 const OMPExecutableDirective &D,
13239 llvm::Function *TaskFunction,
13240 QualType SharedsTy, Address Shareds,
13241 const Expr *IfCond,
13242 const OMPTaskDataTy &Data) {
13243 llvm_unreachable("Not supported in SIMD-only mode");
13244}
13245
13248 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13249 const Expr *IfCond, const OMPTaskDataTy &Data) {
13250 llvm_unreachable("Not supported in SIMD-only mode");
13251}
13252
13256 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13257 assert(Options.SimpleReduction && "Only simple reduction is expected.");
13258 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13259 ReductionOps, Options);
13260}
13261
13264 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13265 llvm_unreachable("Not supported in SIMD-only mode");
13266}
13267
13269 SourceLocation Loc,
13270 bool IsWorksharingReduction) {
13271 llvm_unreachable("Not supported in SIMD-only mode");
13272}
13273
13275 SourceLocation Loc,
13276 ReductionCodeGen &RCG,
13277 unsigned N) {
13278 llvm_unreachable("Not supported in SIMD-only mode");
13279}
13280
13282 SourceLocation Loc,
13283 llvm::Value *ReductionsPtr,
13284 LValue SharedLVal) {
13285 llvm_unreachable("Not supported in SIMD-only mode");
13286}
13287
13289 SourceLocation Loc,
13290 const OMPTaskDataTy &Data) {
13291 llvm_unreachable("Not supported in SIMD-only mode");
13292}
13293
13296 OpenMPDirectiveKind CancelRegion) {
13297 llvm_unreachable("Not supported in SIMD-only mode");
13298}
13299
13301 SourceLocation Loc, const Expr *IfCond,
13302 OpenMPDirectiveKind CancelRegion) {
13303 llvm_unreachable("Not supported in SIMD-only mode");
13304}
13305
13307 const OMPExecutableDirective &D, StringRef ParentName,
13308 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13309 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13310 llvm_unreachable("Not supported in SIMD-only mode");
13311}
13312
13315 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13316 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13317 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13318 const OMPLoopDirective &D)>
13319 SizeEmitter) {
13320 llvm_unreachable("Not supported in SIMD-only mode");
13321}
13322
13324 llvm_unreachable("Not supported in SIMD-only mode");
13325}
13326
13328 llvm_unreachable("Not supported in SIMD-only mode");
13329}
13330
13332 return false;
13333}
13334
13336 const OMPExecutableDirective &D,
13337 SourceLocation Loc,
13338 llvm::Function *OutlinedFn,
13339 ArrayRef<llvm::Value *> CapturedVars) {
13340 llvm_unreachable("Not supported in SIMD-only mode");
13341}
13342
13344 const Expr *NumTeams,
13345 const Expr *ThreadLimit,
13346 SourceLocation Loc) {
13347 llvm_unreachable("Not supported in SIMD-only mode");
13348}
13349
13351 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13352 const Expr *Device, const RegionCodeGenTy &CodeGen,
13354 llvm_unreachable("Not supported in SIMD-only mode");
13355}
13356
13358 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13359 const Expr *Device) {
13360 llvm_unreachable("Not supported in SIMD-only mode");
13361}
13362
13364 const OMPLoopDirective &D,
13365 ArrayRef<Expr *> NumIterations) {
13366 llvm_unreachable("Not supported in SIMD-only mode");
13367}
13368
13370 const OMPDependClause *C) {
13371 llvm_unreachable("Not supported in SIMD-only mode");
13372}
13373
13375 const OMPDoacrossClause *C) {
13376 llvm_unreachable("Not supported in SIMD-only mode");
13377}
13378
13379const VarDecl *
13381 const VarDecl *NativeParam) const {
13382 llvm_unreachable("Not supported in SIMD-only mode");
13383}
13384
13385Address
13387 const VarDecl *NativeParam,
13388 const VarDecl *TargetParam) const {
13389 llvm_unreachable("Not supported in SIMD-only mode");
13390}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
SourceManager & getSourceManager()
Definition ASTContext.h:851
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:944
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5270
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3723
Attr - This represents one attribute.
Definition Attr.h:45
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3931
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3965
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1353
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3971
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3959
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3962
This captures a statement into a function.
Definition Stmt.h:3918
const Capture * const_capture_iterator
Definition Stmt.h:4052
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4069
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4039
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:4022
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1479
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4064
capture_range captures()
Definition Stmt.h:4056
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:193
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3207
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2394
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2278
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3216
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2251
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5472
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:177
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:245
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2402
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:5047
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:226
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5646
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2202
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2624
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3226
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:296
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1575
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:676
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:189
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1633
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5282
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1691
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1807
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:656
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2092
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2944
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1702
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:740
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:183
CharUnits getAlignment() const
Definition CGValue.h:349
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:344
Address getAddress() const
Definition CGValue.h:367
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:352
QualType getType() const
Definition CGValue.h:297
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:341
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:546
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition Diagnostic.h:905
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3116
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3094
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3089
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3669
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:276
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4299
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4035
Represents a member of a struct/union/class.
Definition Decl.h:3160
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3245
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3396
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4696
Represents a function declaration or definition.
Definition Decl.h:2000
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2797
QualType getReturnType() const
Definition Decl.h:2845
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2774
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3743
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3822
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5536
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:974
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3364
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3447
Expr * getBase() const
Definition Expr.h:3441
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
This represents clause 'affinity' in the 'pragma omp task'-based directives.
ArrayRef< MappableComponent > MappableExprComponentListRef
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
This represents clause 'in_reduction' in the 'pragma omp task' directives.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5478
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents clause 'lastprivate' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'nowait' clause in the 'pragma omp ...' directive.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'ordered' clause in the 'pragma omp ...' directive.
This represents clause 'private' in the 'pragma omp ...' directives.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents 'threadset' clause in the 'pragma omp task ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1790
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3329
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8293
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8333
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8478
QualType getCanonicalType() const
Definition TypeBase.h:8345
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4321
field_iterator field_end() const
Definition Decl.h:4527
field_range fields() const
Definition Decl.h:4524
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5225
bool field_empty() const
Definition Decl.h:4532
field_iterator field_begin() const
Definition Decl.cpp:5209
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:85
child_range children()
Definition Stmt.cpp:299
StmtClass getStmtClass() const
Definition Stmt.h:1484
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:338
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:350
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4902
bool isUnion() const
Definition Decl.h:3922
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8892
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9072
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2206
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8629
bool isPointerType() const
Definition TypeBase.h:8530
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8936
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9179
bool isReferenceType() const
Definition TypeBase.h:8554
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:753
bool isLValueReferenceType() const
Definition TypeBase.h:8558
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2412
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3120
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9065
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2801
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9165
bool isFloatingType() const
Definition Type.cpp:2305
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2254
bool isAnyPointerType() const
Definition TypeBase.h:8538
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9112
bool isRecordType() const
Definition TypeBase.h:8657
bool isUnionType() const
Definition Type.cpp:719
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2264
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2373
const Expr * getInit() const
Definition Decl.h:1368
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1217
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1295
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2382
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1262
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1358
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3967
Expr * getSizeExpr() const
Definition TypeBase.h:3981
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Match
This is not an overload because the signature exactly matches an existing declaration.
Definition Sema.h:817
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
@ Conditional
A conditional (?:) operator.
Definition Sema.h:668
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
static bool classof(const Stmt *T)
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5901
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:563
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1746
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5354
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.