clang 20.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGRecordLayout.h"
18#include "CodeGenFunction.h"
19#include "TargetInfo.h"
20#include "clang/AST/APValue.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SetOperations.h"
33#include "llvm/ADT/SmallBitVector.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/StringExtras.h"
36#include "llvm/Bitcode/BitcodeReader.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Value.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Format.h"
44#include "llvm/Support/raw_ostream.h"
45#include <cassert>
46#include <cstdint>
47#include <numeric>
48#include <optional>
49
50using namespace clang;
51using namespace CodeGen;
52using namespace llvm::omp;
53
54namespace {
55/// Base class for handling code generation inside OpenMP regions.
56class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
57public:
58 /// Kinds of OpenMP regions used in codegen.
59 enum CGOpenMPRegionKind {
60 /// Region with outlined function for standalone 'parallel'
61 /// directive.
62 ParallelOutlinedRegion,
63 /// Region with outlined function for standalone 'task' directive.
64 TaskOutlinedRegion,
65 /// Region for constructs that do not require function outlining,
66 /// like 'for', 'sections', 'atomic' etc. directives.
67 InlinedRegion,
68 /// Region with outlined function for standalone 'target' directive.
69 TargetRegion,
70 };
71
72 CGOpenMPRegionInfo(const CapturedStmt &CS,
73 const CGOpenMPRegionKind RegionKind,
74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75 bool HasCancel)
76 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
77 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
78
79 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
80 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
81 bool HasCancel)
82 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
83 Kind(Kind), HasCancel(HasCancel) {}
84
85 /// Get a variable or parameter for storing global thread id
86 /// inside OpenMP construct.
87 virtual const VarDecl *getThreadIDVariable() const = 0;
88
89 /// Emit the captured statement body.
90 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
91
92 /// Get an LValue for the current ThreadID variable.
93 /// \return LValue for thread id variable. This LValue always has type int32*.
94 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
95
96 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
97
98 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
99
100 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
101
102 bool hasCancel() const { return HasCancel; }
103
104 static bool classof(const CGCapturedStmtInfo *Info) {
105 return Info->getKind() == CR_OpenMP;
106 }
107
108 ~CGOpenMPRegionInfo() override = default;
109
110protected:
111 CGOpenMPRegionKind RegionKind;
112 RegionCodeGenTy CodeGen;
114 bool HasCancel;
115};
116
117/// API for captured statement code generation in OpenMP constructs.
118class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
119public:
120 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
121 const RegionCodeGenTy &CodeGen,
122 OpenMPDirectiveKind Kind, bool HasCancel,
123 StringRef HelperName)
124 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
125 HasCancel),
126 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
127 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128 }
129
130 /// Get a variable or parameter for storing global thread id
131 /// inside OpenMP construct.
132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133
134 /// Get the name of the capture helper.
135 StringRef getHelperName() const override { return HelperName; }
136
137 static bool classof(const CGCapturedStmtInfo *Info) {
138 return CGOpenMPRegionInfo::classof(Info) &&
139 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
140 ParallelOutlinedRegion;
141 }
142
143private:
144 /// A variable or parameter storing global thread id for OpenMP
145 /// constructs.
146 const VarDecl *ThreadIDVar;
147 StringRef HelperName;
148};
149
150/// API for captured statement code generation in OpenMP constructs.
151class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
152public:
153 class UntiedTaskActionTy final : public PrePostActionTy {
154 bool Untied;
155 const VarDecl *PartIDVar;
156 const RegionCodeGenTy UntiedCodeGen;
157 llvm::SwitchInst *UntiedSwitch = nullptr;
158
159 public:
160 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
161 const RegionCodeGenTy &UntiedCodeGen)
162 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
163 void Enter(CodeGenFunction &CGF) override {
164 if (Untied) {
165 // Emit task switching point.
166 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
167 CGF.GetAddrOfLocalVar(PartIDVar),
168 PartIDVar->getType()->castAs<PointerType>());
169 llvm::Value *Res =
170 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
171 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
172 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
173 CGF.EmitBlock(DoneBB);
175 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
176 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
177 CGF.Builder.GetInsertBlock());
178 emitUntiedSwitch(CGF);
179 }
180 }
181 void emitUntiedSwitch(CodeGenFunction &CGF) const {
182 if (Untied) {
183 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
184 CGF.GetAddrOfLocalVar(PartIDVar),
185 PartIDVar->getType()->castAs<PointerType>());
186 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187 PartIdLVal);
188 UntiedCodeGen(CGF);
189 CodeGenFunction::JumpDest CurPoint =
190 CGF.getJumpDestInCurrentScope(".untied.next.");
192 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
193 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
194 CGF.Builder.GetInsertBlock());
195 CGF.EmitBranchThroughCleanup(CurPoint);
196 CGF.EmitBlock(CurPoint.getBlock());
197 }
198 }
199 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
200 };
201 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
202 const VarDecl *ThreadIDVar,
203 const RegionCodeGenTy &CodeGen,
204 OpenMPDirectiveKind Kind, bool HasCancel,
205 const UntiedTaskActionTy &Action)
206 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
207 ThreadIDVar(ThreadIDVar), Action(Action) {
208 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209 }
210
211 /// Get a variable or parameter for storing global thread id
212 /// inside OpenMP construct.
213 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
214
215 /// Get an LValue for the current ThreadID variable.
216 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
217
218 /// Get the name of the capture helper.
219 StringRef getHelperName() const override { return ".omp_outlined."; }
220
221 void emitUntiedSwitch(CodeGenFunction &CGF) override {
222 Action.emitUntiedSwitch(CGF);
223 }
224
225 static bool classof(const CGCapturedStmtInfo *Info) {
226 return CGOpenMPRegionInfo::classof(Info) &&
227 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
228 TaskOutlinedRegion;
229 }
230
231private:
232 /// A variable or parameter storing global thread id for OpenMP
233 /// constructs.
234 const VarDecl *ThreadIDVar;
235 /// Action for emitting code for untied tasks.
236 const UntiedTaskActionTy &Action;
237};
238
239/// API for inlined captured statement code generation in OpenMP
240/// constructs.
241class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
242public:
243 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
244 const RegionCodeGenTy &CodeGen,
245 OpenMPDirectiveKind Kind, bool HasCancel)
246 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
247 OldCSI(OldCSI),
248 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
249
250 // Retrieve the value of the context parameter.
251 llvm::Value *getContextValue() const override {
252 if (OuterRegionInfo)
253 return OuterRegionInfo->getContextValue();
254 llvm_unreachable("No context value for inlined OpenMP region");
255 }
256
257 void setContextValue(llvm::Value *V) override {
258 if (OuterRegionInfo) {
259 OuterRegionInfo->setContextValue(V);
260 return;
261 }
262 llvm_unreachable("No context value for inlined OpenMP region");
263 }
264
265 /// Lookup the captured field decl for a variable.
266 const FieldDecl *lookup(const VarDecl *VD) const override {
267 if (OuterRegionInfo)
268 return OuterRegionInfo->lookup(VD);
269 // If there is no outer outlined region,no need to lookup in a list of
270 // captured variables, we can use the original one.
271 return nullptr;
272 }
273
274 FieldDecl *getThisFieldDecl() const override {
275 if (OuterRegionInfo)
276 return OuterRegionInfo->getThisFieldDecl();
277 return nullptr;
278 }
279
280 /// Get a variable or parameter for storing global thread id
281 /// inside OpenMP construct.
282 const VarDecl *getThreadIDVariable() const override {
283 if (OuterRegionInfo)
284 return OuterRegionInfo->getThreadIDVariable();
285 return nullptr;
286 }
287
288 /// Get an LValue for the current ThreadID variable.
289 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
290 if (OuterRegionInfo)
291 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
292 llvm_unreachable("No LValue for inlined OpenMP construct");
293 }
294
295 /// Get the name of the capture helper.
296 StringRef getHelperName() const override {
297 if (auto *OuterRegionInfo = getOldCSI())
298 return OuterRegionInfo->getHelperName();
299 llvm_unreachable("No helper name for inlined OpenMP construct");
300 }
301
302 void emitUntiedSwitch(CodeGenFunction &CGF) override {
303 if (OuterRegionInfo)
304 OuterRegionInfo->emitUntiedSwitch(CGF);
305 }
306
307 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
308
309 static bool classof(const CGCapturedStmtInfo *Info) {
310 return CGOpenMPRegionInfo::classof(Info) &&
311 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
312 }
313
314 ~CGOpenMPInlinedRegionInfo() override = default;
315
316private:
317 /// CodeGen info about outer OpenMP region.
318 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
319 CGOpenMPRegionInfo *OuterRegionInfo;
320};
321
322/// API for captured statement code generation in OpenMP target
323/// constructs. For this captures, implicit parameters are used instead of the
324/// captured fields. The name of the target region has to be unique in a given
325/// application so it is provided by the client, because only the client has
326/// the information to generate that.
327class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
328public:
329 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
330 const RegionCodeGenTy &CodeGen, StringRef HelperName)
331 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
332 /*HasCancel=*/false),
333 HelperName(HelperName) {}
334
335 /// This is unused for target regions because each starts executing
336 /// with a single thread.
337 const VarDecl *getThreadIDVariable() const override { return nullptr; }
338
339 /// Get the name of the capture helper.
340 StringRef getHelperName() const override { return HelperName; }
341
342 static bool classof(const CGCapturedStmtInfo *Info) {
343 return CGOpenMPRegionInfo::classof(Info) &&
344 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
345 }
346
347private:
348 StringRef HelperName;
349};
350
351static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
352 llvm_unreachable("No codegen for expressions");
353}
354/// API for generation of expressions captured in a innermost OpenMP
355/// region.
356class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
357public:
358 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
359 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
360 OMPD_unknown,
361 /*HasCancel=*/false),
362 PrivScope(CGF) {
363 // Make sure the globals captured in the provided statement are local by
364 // using the privatization logic. We assume the same variable is not
365 // captured more than once.
366 for (const auto &C : CS.captures()) {
367 if (!C.capturesVariable() && !C.capturesVariableByCopy())
368 continue;
369
370 const VarDecl *VD = C.getCapturedVar();
371 if (VD->isLocalVarDeclOrParm())
372 continue;
373
374 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
375 /*RefersToEnclosingVariableOrCapture=*/false,
377 C.getLocation());
378 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
379 }
380 (void)PrivScope.Privatize();
381 }
382
383 /// Lookup the captured field decl for a variable.
384 const FieldDecl *lookup(const VarDecl *VD) const override {
385 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
386 return FD;
387 return nullptr;
388 }
389
390 /// Emit the captured statement body.
391 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
392 llvm_unreachable("No body for expressions");
393 }
394
395 /// Get a variable or parameter for storing global thread id
396 /// inside OpenMP construct.
397 const VarDecl *getThreadIDVariable() const override {
398 llvm_unreachable("No thread id for expressions");
399 }
400
401 /// Get the name of the capture helper.
402 StringRef getHelperName() const override {
403 llvm_unreachable("No helper name for expressions");
404 }
405
406 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
407
408private:
409 /// Private scope to capture global variables.
410 CodeGenFunction::OMPPrivateScope PrivScope;
411};
412
413/// RAII for emitting code of OpenMP constructs.
414class InlinedOpenMPRegionRAII {
415 CodeGenFunction &CGF;
416 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
417 FieldDecl *LambdaThisCaptureField = nullptr;
418 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
419 bool NoInheritance = false;
420
421public:
422 /// Constructs region for combined constructs.
423 /// \param CodeGen Code generation sequence for combined directives. Includes
424 /// a list of functions used for code generation of implicitly inlined
425 /// regions.
426 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
427 OpenMPDirectiveKind Kind, bool HasCancel,
428 bool NoInheritance = true)
429 : CGF(CGF), NoInheritance(NoInheritance) {
430 // Start emission for the construct.
431 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
432 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
433 if (NoInheritance) {
434 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
435 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
436 CGF.LambdaThisCaptureField = nullptr;
437 BlockInfo = CGF.BlockInfo;
438 CGF.BlockInfo = nullptr;
439 }
440 }
441
442 ~InlinedOpenMPRegionRAII() {
443 // Restore original CapturedStmtInfo only if we're done with code emission.
444 auto *OldCSI =
445 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
446 delete CGF.CapturedStmtInfo;
447 CGF.CapturedStmtInfo = OldCSI;
448 if (NoInheritance) {
449 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
450 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
451 CGF.BlockInfo = BlockInfo;
452 }
453 }
454};
455
456/// Values for bit flags used in the ident_t to describe the fields.
457/// All enumeric elements are named and described in accordance with the code
458/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
459enum OpenMPLocationFlags : unsigned {
460 /// Use trampoline for internal microtask.
461 OMP_IDENT_IMD = 0x01,
462 /// Use c-style ident structure.
463 OMP_IDENT_KMPC = 0x02,
464 /// Atomic reduction option for kmpc_reduce.
465 OMP_ATOMIC_REDUCE = 0x10,
466 /// Explicit 'barrier' directive.
467 OMP_IDENT_BARRIER_EXPL = 0x20,
468 /// Implicit barrier in code.
469 OMP_IDENT_BARRIER_IMPL = 0x40,
470 /// Implicit barrier in 'for' directive.
471 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
472 /// Implicit barrier in 'sections' directive.
473 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
474 /// Implicit barrier in 'single' directive.
475 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
476 /// Call of __kmp_for_static_init for static loop.
477 OMP_IDENT_WORK_LOOP = 0x200,
478 /// Call of __kmp_for_static_init for sections.
479 OMP_IDENT_WORK_SECTIONS = 0x400,
480 /// Call of __kmp_for_static_init for distribute.
481 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483};
484
485/// Describes ident structure that describes a source location.
486/// All descriptions are taken from
487/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
488/// Original structure:
489/// typedef struct ident {
490/// kmp_int32 reserved_1; /**< might be used in Fortran;
491/// see above */
492/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
493/// KMP_IDENT_KMPC identifies this union
494/// member */
495/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
496/// see above */
497///#if USE_ITT_BUILD
498/// /* but currently used for storing
499/// region-specific ITT */
500/// /* contextual information. */
501///#endif /* USE_ITT_BUILD */
502/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
503/// C++ */
504/// char const *psource; /**< String describing the source location.
505/// The string is composed of semi-colon separated
506// fields which describe the source file,
507/// the function and a pair of line numbers that
508/// delimit the construct.
509/// */
510/// } ident_t;
511enum IdentFieldIndex {
512 /// might be used in Fortran
513 IdentField_Reserved_1,
514 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
515 IdentField_Flags,
516 /// Not really used in Fortran any more
517 IdentField_Reserved_2,
518 /// Source[4] in Fortran, do not use for C++
519 IdentField_Reserved_3,
520 /// String describing the source location. The string is composed of
521 /// semi-colon separated fields which describe the source file, the function
522 /// and a pair of line numbers that delimit the construct.
523 IdentField_PSource
524};
525
526/// Schedule types for 'omp for' loops (these enumerators are taken from
527/// the enum sched_type in kmp.h).
528enum OpenMPSchedType {
529 /// Lower bound for default (unordered) versions.
530 OMP_sch_lower = 32,
531 OMP_sch_static_chunked = 33,
532 OMP_sch_static = 34,
533 OMP_sch_dynamic_chunked = 35,
534 OMP_sch_guided_chunked = 36,
535 OMP_sch_runtime = 37,
536 OMP_sch_auto = 38,
537 /// static with chunk adjustment (e.g., simd)
538 OMP_sch_static_balanced_chunked = 45,
539 /// Lower bound for 'ordered' versions.
540 OMP_ord_lower = 64,
541 OMP_ord_static_chunked = 65,
542 OMP_ord_static = 66,
543 OMP_ord_dynamic_chunked = 67,
544 OMP_ord_guided_chunked = 68,
545 OMP_ord_runtime = 69,
546 OMP_ord_auto = 70,
547 OMP_sch_default = OMP_sch_static,
548 /// dist_schedule types
549 OMP_dist_sch_static_chunked = 91,
550 OMP_dist_sch_static = 92,
551 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
552 /// Set if the monotonic schedule modifier was present.
553 OMP_sch_modifier_monotonic = (1 << 29),
554 /// Set if the nonmonotonic schedule modifier was present.
555 OMP_sch_modifier_nonmonotonic = (1 << 30),
556};
557
558/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
559/// region.
560class CleanupTy final : public EHScopeStack::Cleanup {
561 PrePostActionTy *Action;
562
563public:
564 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
565 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
566 if (!CGF.HaveInsertPoint())
567 return;
568 Action->Exit(CGF);
569 }
570};
571
572} // anonymous namespace
573
576 if (PrePostAction) {
577 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
578 Callback(CodeGen, CGF, *PrePostAction);
579 } else {
580 PrePostActionTy Action;
581 Callback(CodeGen, CGF, Action);
582 }
583}
584
585/// Check if the combiner is a call to UDR combiner and if it is so return the
586/// UDR decl used for reduction.
587static const OMPDeclareReductionDecl *
588getReductionInit(const Expr *ReductionOp) {
589 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
590 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
591 if (const auto *DRE =
592 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
593 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
594 return DRD;
595 return nullptr;
596}
597
599 const OMPDeclareReductionDecl *DRD,
600 const Expr *InitOp,
601 Address Private, Address Original,
602 QualType Ty) {
603 if (DRD->getInitializer()) {
604 std::pair<llvm::Function *, llvm::Function *> Reduction =
606 const auto *CE = cast<CallExpr>(InitOp);
607 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
608 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
609 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
610 const auto *LHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
612 const auto *RHSDRE =
613 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
614 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
615 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
616 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
617 (void)PrivateScope.Privatize();
619 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
620 CGF.EmitIgnoredExpr(InitOp);
621 } else {
622 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
623 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
624 auto *GV = new llvm::GlobalVariable(
625 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
626 llvm::GlobalValue::PrivateLinkage, Init, Name);
627 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
628 RValue InitRVal;
629 switch (CGF.getEvaluationKind(Ty)) {
630 case TEK_Scalar:
631 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
632 break;
633 case TEK_Complex:
634 InitRVal =
636 break;
637 case TEK_Aggregate: {
638 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
639 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
640 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
641 /*IsInitializer=*/false);
642 return;
643 }
644 }
645 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
646 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
647 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
648 /*IsInitializer=*/false);
649 }
650}
651
652/// Emit initialization of arrays of complex types.
653/// \param DestAddr Address of the array.
654/// \param Type Type of array.
655/// \param Init Initial expression of array.
656/// \param SrcAddr Address of the original array.
658 QualType Type, bool EmitDeclareReductionInit,
659 const Expr *Init,
660 const OMPDeclareReductionDecl *DRD,
661 Address SrcAddr = Address::invalid()) {
662 // Perform element-by-element initialization.
663 QualType ElementTy;
664
665 // Drill down to the base element type on both arrays.
666 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
667 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
668 if (DRD)
669 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
670
671 llvm::Value *SrcBegin = nullptr;
672 if (DRD)
673 SrcBegin = SrcAddr.emitRawPointer(CGF);
674 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
675 // Cast from pointer to array type to pointer to single element.
676 llvm::Value *DestEnd =
677 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
678 // The basic structure here is a while-do loop.
679 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
680 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
681 llvm::Value *IsEmpty =
682 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
683 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
684
685 // Enter the loop body, making that address the current address.
686 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
687 CGF.EmitBlock(BodyBB);
688
689 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
690
691 llvm::PHINode *SrcElementPHI = nullptr;
692 Address SrcElementCurrent = Address::invalid();
693 if (DRD) {
694 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
695 "omp.arraycpy.srcElementPast");
696 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
697 SrcElementCurrent =
698 Address(SrcElementPHI, SrcAddr.getElementType(),
699 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
700 }
701 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
702 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
703 DestElementPHI->addIncoming(DestBegin, EntryBB);
704 Address DestElementCurrent =
705 Address(DestElementPHI, DestAddr.getElementType(),
706 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
707
708 // Emit copy.
709 {
710 CodeGenFunction::RunCleanupsScope InitScope(CGF);
711 if (EmitDeclareReductionInit) {
712 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
713 SrcElementCurrent, ElementTy);
714 } else
715 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
716 /*IsInitializer=*/false);
717 }
718
719 if (DRD) {
720 // Shift the address forward by one element.
721 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
722 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
723 "omp.arraycpy.dest.element");
724 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
725 }
726
727 // Shift the address forward by one element.
728 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
729 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
730 "omp.arraycpy.dest.element");
731 // Check whether we've reached the end.
732 llvm::Value *Done =
733 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
734 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
735 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
736
737 // Done.
738 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
739}
740
741LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
742 return CGF.EmitOMPSharedLValue(E);
743}
744
745LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
746 const Expr *E) {
747 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
748 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
749 return LValue();
750}
751
752void ReductionCodeGen::emitAggregateInitialization(
753 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
754 const OMPDeclareReductionDecl *DRD) {
755 // Emit VarDecl with copy init for arrays.
756 // Get the address of the original variable captured in current
757 // captured region.
758 const auto *PrivateVD =
759 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
760 bool EmitDeclareReductionInit =
761 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
762 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
763 EmitDeclareReductionInit,
764 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
765 : PrivateVD->getInit(),
766 DRD, SharedAddr);
767}
768
771 ArrayRef<const Expr *> Privates,
772 ArrayRef<const Expr *> ReductionOps) {
773 ClausesData.reserve(Shareds.size());
774 SharedAddresses.reserve(Shareds.size());
775 Sizes.reserve(Shareds.size());
776 BaseDecls.reserve(Shareds.size());
777 const auto *IOrig = Origs.begin();
778 const auto *IPriv = Privates.begin();
779 const auto *IRed = ReductionOps.begin();
780 for (const Expr *Ref : Shareds) {
781 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
782 std::advance(IOrig, 1);
783 std::advance(IPriv, 1);
784 std::advance(IRed, 1);
785 }
786}
787
789 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
790 "Number of generated lvalues must be exactly N.");
791 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
792 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
793 SharedAddresses.emplace_back(First, Second);
794 if (ClausesData[N].Shared == ClausesData[N].Ref) {
795 OrigAddresses.emplace_back(First, Second);
796 } else {
797 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
798 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
799 OrigAddresses.emplace_back(First, Second);
800 }
801}
802
804 QualType PrivateType = getPrivateType(N);
805 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
806 if (!PrivateType->isVariablyModifiedType()) {
807 Sizes.emplace_back(
808 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
809 nullptr);
810 return;
811 }
812 llvm::Value *Size;
813 llvm::Value *SizeInChars;
814 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
815 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
816 if (AsArraySection) {
817 Size = CGF.Builder.CreatePtrDiff(ElemType,
818 OrigAddresses[N].second.getPointer(CGF),
819 OrigAddresses[N].first.getPointer(CGF));
820 Size = CGF.Builder.CreateNUWAdd(
821 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
822 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
823 } else {
824 SizeInChars =
825 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
826 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
827 }
828 Sizes.emplace_back(SizeInChars, Size);
830 CGF,
831 cast<OpaqueValueExpr>(
832 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
833 RValue::get(Size));
834 CGF.EmitVariablyModifiedType(PrivateType);
835}
836
838 llvm::Value *Size) {
839 QualType PrivateType = getPrivateType(N);
840 if (!PrivateType->isVariablyModifiedType()) {
841 assert(!Size && !Sizes[N].second &&
842 "Size should be nullptr for non-variably modified reduction "
843 "items.");
844 return;
845 }
847 CGF,
848 cast<OpaqueValueExpr>(
849 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
850 RValue::get(Size));
851 CGF.EmitVariablyModifiedType(PrivateType);
852}
853
855 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
856 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
857 assert(SharedAddresses.size() > N && "No variable was generated");
858 const auto *PrivateVD =
859 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
860 const OMPDeclareReductionDecl *DRD =
861 getReductionInit(ClausesData[N].ReductionOp);
862 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
863 if (DRD && DRD->getInitializer())
864 (void)DefaultInit(CGF);
865 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
866 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
867 (void)DefaultInit(CGF);
868 QualType SharedType = SharedAddresses[N].first.getType();
869 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
870 PrivateAddr, SharedAddr, SharedType);
871 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
872 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
873 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
874 PrivateVD->getType().getQualifiers(),
875 /*IsInitializer=*/false);
876 }
877}
878
880 QualType PrivateType = getPrivateType(N);
881 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
882 return DTorKind != QualType::DK_none;
883}
884
886 Address PrivateAddr) {
887 QualType PrivateType = getPrivateType(N);
888 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
889 if (needCleanups(N)) {
890 PrivateAddr =
891 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
892 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
893 }
894}
895
897 LValue BaseLV) {
898 BaseTy = BaseTy.getNonReferenceType();
899 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
900 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
901 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
902 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
903 } else {
904 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
905 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
906 }
907 BaseTy = BaseTy->getPointeeType();
908 }
909 return CGF.MakeAddrLValue(
910 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
911 BaseLV.getType(), BaseLV.getBaseInfo(),
912 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
913}
914
916 Address OriginalBaseAddress, llvm::Value *Addr) {
918 Address TopTmp = Address::invalid();
919 Address MostTopTmp = Address::invalid();
920 BaseTy = BaseTy.getNonReferenceType();
921 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
922 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
923 Tmp = CGF.CreateMemTemp(BaseTy);
924 if (TopTmp.isValid())
925 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
926 else
927 MostTopTmp = Tmp;
928 TopTmp = Tmp;
929 BaseTy = BaseTy->getPointeeType();
930 }
931
932 if (Tmp.isValid()) {
934 Addr, Tmp.getElementType());
935 CGF.Builder.CreateStore(Addr, Tmp);
936 return MostTopTmp;
937 }
938
940 Addr, OriginalBaseAddress.getType());
941 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
942}
943
944static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
945 const VarDecl *OrigVD = nullptr;
946 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
947 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
949 Base = TempOASE->getBase()->IgnoreParenImpCasts();
950 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
951 Base = TempASE->getBase()->IgnoreParenImpCasts();
952 DE = cast<DeclRefExpr>(Base);
953 OrigVD = cast<VarDecl>(DE->getDecl());
954 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
955 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
956 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
957 Base = TempASE->getBase()->IgnoreParenImpCasts();
958 DE = cast<DeclRefExpr>(Base);
959 OrigVD = cast<VarDecl>(DE->getDecl());
960 }
961 return OrigVD;
962}
963
965 Address PrivateAddr) {
966 const DeclRefExpr *DE;
967 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
968 BaseDecls.emplace_back(OrigVD);
969 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
970 LValue BaseLValue =
971 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
972 OriginalBaseLValue);
973 Address SharedAddr = SharedAddresses[N].first.getAddress();
974 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
975 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
976 SharedAddr.emitRawPointer(CGF));
977 llvm::Value *PrivatePointer =
979 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
980 llvm::Value *Ptr = CGF.Builder.CreateGEP(
981 SharedAddr.getElementType(), PrivatePointer, Adjustment);
982 return castToBase(CGF, OrigVD->getType(),
983 SharedAddresses[N].first.getType(),
984 OriginalBaseLValue.getAddress(), Ptr);
985 }
986 BaseDecls.emplace_back(
987 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
988 return PrivateAddr;
989}
990
992 const OMPDeclareReductionDecl *DRD =
993 getReductionInit(ClausesData[N].ReductionOp);
994 return DRD && DRD->getInitializer();
995}
996
997LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
998 return CGF.EmitLoadOfPointerLValue(
999 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1000 getThreadIDVariable()->getType()->castAs<PointerType>());
1001}
1002
1003void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1004 if (!CGF.HaveInsertPoint())
1005 return;
1006 // 1.2.2 OpenMP Language Terminology
1007 // Structured block - An executable statement with a single entry at the
1008 // top and a single exit at the bottom.
1009 // The point of exit cannot be a branch out of the structured block.
1010 // longjmp() and throw() must not violate the entry/exit criteria.
1011 CGF.EHStack.pushTerminate();
1012 if (S)
1014 CodeGen(CGF);
1015 CGF.EHStack.popTerminate();
1016}
1017
1018LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1019 CodeGenFunction &CGF) {
1020 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1021 getThreadIDVariable()->getType(),
1023}
1024
1026 QualType FieldTy) {
1027 auto *Field = FieldDecl::Create(
1028 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1029 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1030 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1031 Field->setAccess(AS_public);
1032 DC->addDecl(Field);
1033 return Field;
1034}
1035
1037 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1038 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1039 llvm::OpenMPIRBuilderConfig Config(
1040 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1041 CGM.getLangOpts().OpenMPOffloadMandatory,
1042 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1043 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1047 : StringRef{});
1048 OMPBuilder.setConfig(Config);
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056}
1057
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071}
1072
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075}
1076
1077static llvm::Function *
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(&OmpOutParm);
1090 Args.push_back(&OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097 Name, &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099 if (CGM.getLangOpts().Optimize) {
1100 Fn->removeFnAttr(llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108 Out->getLocation());
1109 CodeGenFunction::OMPPrivateScope Scope(CGF);
1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111 Scope.addPrivate(
1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115 Scope.addPrivate(
1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Out->getInit())) {
1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122 Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130}
1131
1134 if (UDRMap.count(D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, D->getType(), D->getCombiner(),
1138 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1139 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1144 CGM, D->getType(),
1145 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1146 : nullptr,
1147 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1148 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(D, Combiner, Initializer);
1152 if (CGF) {
1153 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1154 Decls.second.push_back(D);
1155 }
1156}
1157
1158std::pair<llvm::Function *, llvm::Function *>
1160 auto I = UDRMap.find(D);
1161 if (I != UDRMap.end())
1162 return I->second;
1163 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164 return UDRMap.lookup(D);
1165}
1166
1167namespace {
1168// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169// Builder if one is present.
1170struct PushAndPopStackRAII {
1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172 bool HasCancel, llvm::omp::Directive Kind)
1173 : OMPBuilder(OMPBuilder) {
1174 if (!OMPBuilder)
1175 return;
1176
1177 // The following callback is the crucial part of clangs cleanup process.
1178 //
1179 // NOTE:
1180 // Once the OpenMPIRBuilder is used to create parallel regions (and
1181 // similar), the cancellation destination (Dest below) is determined via
1182 // IP. That means if we have variables to finalize we split the block at IP,
1183 // use the new block (=BB) as destination to build a JumpDest (via
1184 // getJumpDestInCurrentScope(BB)) which then is fed to
1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186 // to push & pop an FinalizationInfo object.
1187 // The FiniCB will still be needed but at the point where the
1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190 assert(IP.getBlock()->end() == IP.getPoint() &&
1191 "Clang CG should cause non-terminated block!");
1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193 CGF.Builder.restoreIP(IP);
1195 CGF.getOMPCancelDestination(OMPD_parallel);
1196 CGF.EmitBranchThroughCleanup(Dest);
1197 };
1198
1199 // TODO: Remove this once we emit parallel regions through the
1200 // OpenMPIRBuilder as it can do this setup internally.
1201 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1202 OMPBuilder->pushFinalizationCB(std::move(FI));
1203 }
1204 ~PushAndPopStackRAII() {
1205 if (OMPBuilder)
1206 OMPBuilder->popFinalizationCB();
1207 }
1208 llvm::OpenMPIRBuilder *OMPBuilder;
1209};
1210} // namespace
1211
1213 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1214 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1215 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1216 assert(ThreadIDVar->getType()->isPointerType() &&
1217 "thread id variable must be of type kmp_int32 *");
1218 CodeGenFunction CGF(CGM, true);
1219 bool HasCancel = false;
1220 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1223 HasCancel = OPD->hasCancel();
1224 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1225 HasCancel = OPSD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232 else if (const auto *OPFD =
1233 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD =
1236 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1237 HasCancel = OPFD->hasCancel();
1238
1239 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1240 // parallel region to make cancellation barriers work properly.
1241 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1242 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1243 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1244 HasCancel, OutlinedHelperName);
1245 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1247}
1248
1249std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1250 std::string Suffix = getName({"omp_outlined"});
1251 return (Name + Suffix).str();
1252}
1253
1255 return getOutlinedHelperName(CGF.CurFn->getName());
1256}
1257
1258std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1259 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1260 return (Name + Suffix).str();
1261}
1262
1265 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1266 const RegionCodeGenTy &CodeGen) {
1267 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1269 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1270 CodeGen);
1271}
1272
1275 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1276 const RegionCodeGenTy &CodeGen) {
1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1280 CodeGen);
1281}
1282
1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1287 bool Tied, unsigned &NumberOfParts) {
1288 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1289 PrePostActionTy &) {
1290 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1291 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1292 llvm::Value *TaskArgs[] = {
1293 UpLoc, ThreadID,
1294 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1295 TaskTVar->getType()->castAs<PointerType>())
1296 .getPointer(CGF)};
1297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1298 CGM.getModule(), OMPRTL___kmpc_omp_task),
1299 TaskArgs);
1300 };
1301 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1302 UntiedCodeGen);
1303 CodeGen.setAction(Action);
1304 assert(!ThreadIDVar->getType()->isPointerType() &&
1305 "thread id variable must be of type kmp_int32 for tasks");
1306 const OpenMPDirectiveKind Region =
1307 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1308 : OMPD_task;
1309 const CapturedStmt *CS = D.getCapturedStmt(Region);
1310 bool HasCancel = false;
1311 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319
1320 CodeGenFunction CGF(CGM, true);
1321 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1322 InnermostKind, HasCancel, Action);
1323 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1324 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1325 if (!Tied)
1326 NumberOfParts = Action.getNumberOfParts();
1327 return Res;
1328}
1329
1331 bool AtCurrentPoint) {
1332 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1333 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1334
1335 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1336 if (AtCurrentPoint) {
1337 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1338 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1339 } else {
1340 Elem.second.ServiceInsertPt =
1341 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1343 }
1344}
1345
1347 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1348 if (Elem.second.ServiceInsertPt) {
1349 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1350 Elem.second.ServiceInsertPt = nullptr;
1351 Ptr->eraseFromParent();
1352 }
1353}
1354
1357 SmallString<128> &Buffer) {
1358 llvm::raw_svector_ostream OS(Buffer);
1359 // Build debug location
1361 OS << ";" << PLoc.getFilename() << ";";
1362 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1363 OS << FD->getQualifiedNameAsString();
1364 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1365 return OS.str();
1366}
1367
1370 unsigned Flags, bool EmitLoc) {
1371 uint32_t SrcLocStrSize;
1372 llvm::Constant *SrcLocStr;
1373 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1374 llvm::codegenoptions::NoDebugInfo) ||
1375 Loc.isInvalid()) {
1376 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1377 } else {
1378 std::string FunctionName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1382 const char *FileName = PLoc.getFilename();
1383 unsigned Line = PLoc.getLine();
1384 unsigned Column = PLoc.getColumn();
1385 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1386 Column, SrcLocStrSize);
1387 }
1388 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1389 return OMPBuilder.getOrCreateIdent(
1390 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1391}
1392
1395 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1396 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1397 // the clang invariants used below might be broken.
1398 if (CGM.getLangOpts().OpenMPIRBuilder) {
1399 SmallString<128> Buffer;
1400 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1401 uint32_t SrcLocStrSize;
1402 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1403 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1404 return OMPBuilder.getOrCreateThreadID(
1405 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1406 }
1407
1408 llvm::Value *ThreadID = nullptr;
1409 // Check whether we've already cached a load of the thread id in this
1410 // function.
1411 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1412 if (I != OpenMPLocThreadIDMap.end()) {
1413 ThreadID = I->second.ThreadID;
1414 if (ThreadID != nullptr)
1415 return ThreadID;
1416 }
1417 // If exceptions are enabled, do not use parameter to avoid possible crash.
1418 if (auto *OMPRegionInfo =
1419 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1420 if (OMPRegionInfo->getThreadIDVariable()) {
1421 // Check if this an outlined function with thread id passed as argument.
1422 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1423 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1424 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1425 !CGF.getLangOpts().CXXExceptions ||
1426 CGF.Builder.GetInsertBlock() == TopBlock ||
1427 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429 TopBlock ||
1430 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1431 CGF.Builder.GetInsertBlock()) {
1432 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1433 // If value loaded in entry block, cache it and use it everywhere in
1434 // function.
1435 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1436 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1437 Elem.second.ThreadID = ThreadID;
1438 }
1439 return ThreadID;
1440 }
1441 }
1442 }
1443
1444 // This is not an outlined function region - need to call __kmpc_int32
1445 // kmpc_global_thread_num(ident_t *loc).
1446 // Generate thread id value and cache this value for use across the
1447 // function.
1448 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1449 if (!Elem.second.ServiceInsertPt)
1451 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1452 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454 llvm::CallInst *Call = CGF.Builder.CreateCall(
1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456 OMPRTL___kmpc_global_thread_num),
1457 emitUpdateLocation(CGF, Loc));
1458 Call->setCallingConv(CGF.getRuntimeCC());
1459 Elem.second.ThreadID = Call;
1460 return Call;
1461}
1462
1464 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1467 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468 }
1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471 UDRMap.erase(D);
1472 FunctionUDRMap.erase(CGF.CurFn);
1473 }
1474 auto I = FunctionUDMMap.find(CGF.CurFn);
1475 if (I != FunctionUDMMap.end()) {
1476 for(const auto *D : I->second)
1477 UDMMap.erase(D);
1478 FunctionUDMMap.erase(I);
1479 }
1482}
1483
1485 return OMPBuilder.IdentPtr;
1486}
1487
1489 if (!Kmpc_MicroTy) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494 }
1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496}
1497
1498llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533 break;
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1536 break;
1537 default:
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1539 break;
1540 }
1541}
1542
1543static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1544 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1545 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1546
1547 auto FileInfoCallBack = [&]() {
1549 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1550
1551 llvm::sys::fs::UniqueID ID;
1552 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1553 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1554 }
1555
1556 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1557 };
1558
1559 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1560}
1561
1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1564
1565 auto LinkageForVariable = [&VD, this]() {
1567 };
1568
1569 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1576 VD->isExternallyVisible(),
1578 VD->getCanonicalDecl()->getBeginLoc()),
1579 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1580 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1581 LinkageForVariable);
1582
1583 if (!addr)
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1586}
1587
1588llvm::Constant *
1590 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix = getName({"cache", ""});
1594 return OMPBuilder.getOrCreateInternalVariable(
1595 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1596}
1597
1599 const VarDecl *VD,
1600 Address VDAddr,
1602 if (CGM.getLangOpts().OpenMPUseTLS &&
1604 return VDAddr;
1605
1606 llvm::Type *VarTy = VDAddr.getElementType();
1607 llvm::Value *Args[] = {
1609 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1612 return Address(
1613 CGF.EmitRuntimeCall(
1614 OMPBuilder.getOrCreateRuntimeFunction(
1615 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1616 Args),
1617 CGF.Int8Ty, VDAddr.getAlignment());
1618}
1619
1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624 // library.
1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1627 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1628 OMPLoc);
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value *Args[] = {
1632 OMPLoc,
1633 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1634 Ctor, CopyCtor, Dtor};
1635 CGF.EmitRuntimeCall(
1636 OMPBuilder.getOrCreateRuntimeFunction(
1637 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1638 Args);
1639}
1640
1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643 bool PerformInit, CodeGenFunction *CGF) {
1644 if (CGM.getLangOpts().OpenMPUseTLS &&
1646 return nullptr;
1647
1648 VD = VD->getDefinition(CGM.getContext());
1649 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1650 QualType ASTTy = VD->getType();
1651
1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653 const Expr *Init = VD->getAnyInitializer();
1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction CtorCGF(CGM);
1658 FunctionArgList Args;
1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1662 Args.push_back(&Dst);
1663
1665 CGM.getContext().VoidPtrTy, Args);
1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1667 std::string Name = getName({"__kmpc_global_ctor_", ""});
1668 llvm::Function *Fn =
1670 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1671 Args, Loc, Loc);
1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1676 VDAddr.getAlignment());
1677 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal = CtorCGF.EmitLoadOfScalar(
1680 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1682 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1683 CtorCGF.FinishFunction();
1684 Ctor = Fn;
1685 }
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction DtorCGF(CGM);
1690 FunctionArgList Args;
1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1694 Args.push_back(&Dst);
1695
1697 CGM.getContext().VoidTy, Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1699 std::string Name = getName({"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1702 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1703 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1704 Loc, Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 DtorCGF.GetAddrOfLocalVar(&Dst),
1709 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1710 DtorCGF.emitDestroy(
1711 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1712 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1713 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1722 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1723 /*isVarArg=*/false)
1724 ->getPointerTo();
1725 // Copying constructor for the threadprivate variable.
1726 // Must be NULL - reserved by runtime, but currently it requires that this
1727 // parameter is always NULL. Otherwise it fires assertion.
1728 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1729 if (Ctor == nullptr) {
1730 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1731 /*isVarArg=*/false)
1732 ->getPointerTo();
1733 Ctor = llvm::Constant::getNullValue(CtorTy);
1734 }
1735 if (Dtor == nullptr) {
1736 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1737 /*isVarArg=*/false)
1738 ->getPointerTo();
1739 Dtor = llvm::Constant::getNullValue(DtorTy);
1740 }
1741 if (!CGF) {
1742 auto *InitFunctionTy =
1743 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1744 std::string Name = getName({"__omp_threadprivate_init_", ""});
1745 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1746 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1747 CodeGenFunction InitCGF(CGM);
1748 FunctionArgList ArgList;
1749 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1750 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1751 Loc, Loc);
1752 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753 InitCGF.FinishFunction();
1754 return InitFunction;
1755 }
1756 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1757 }
1758 return nullptr;
1759}
1760
1762 llvm::GlobalValue *GV) {
1763 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1764 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1765
1766 // We only need to handle active 'indirect' declare target functions.
1767 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1768 return;
1769
1770 // Get a mangled name to store the new device global in.
1771 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1773 SmallString<128> Name;
1774 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1775
1776 // We need to generate a new global to hold the address of the indirectly
1777 // called device function. Doing this allows us to keep the visibility and
1778 // linkage of the associated function unchanged while allowing the runtime to
1779 // access its value.
1780 llvm::GlobalValue *Addr = GV;
1781 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1782 Addr = new llvm::GlobalVariable(
1784 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1785 nullptr, llvm::GlobalValue::NotThreadLocal,
1786 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1787 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1788 }
1789
1790 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1792 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1793 llvm::GlobalValue::WeakODRLinkage);
1794}
1795
1797 QualType VarType,
1798 StringRef Name) {
1799 std::string Suffix = getName({"artificial", ""});
1800 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1801 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1802 VarLVType, Twine(Name).concat(Suffix).str());
1803 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1805 GAddr->setThreadLocal(/*Val=*/true);
1806 return Address(GAddr, GAddr->getValueType(),
1808 }
1809 std::string CacheSuffix = getName({"cache", ""});
1810 llvm::Value *Args[] = {
1814 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1815 /*isSigned=*/false),
1816 OMPBuilder.getOrCreateInternalVariable(
1818 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1819 return Address(
1821 CGF.EmitRuntimeCall(
1822 OMPBuilder.getOrCreateRuntimeFunction(
1823 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1824 Args),
1825 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1826 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1827}
1828
1830 const RegionCodeGenTy &ThenGen,
1831 const RegionCodeGenTy &ElseGen) {
1832 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1833
1834 // If the condition constant folds and can be elided, try to avoid emitting
1835 // the condition and the dead arm of the if/else.
1836 bool CondConstant;
1837 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1838 if (CondConstant)
1839 ThenGen(CGF);
1840 else
1841 ElseGen(CGF);
1842 return;
1843 }
1844
1845 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1846 // emit the conditional branch.
1847 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1848 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1849 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1850 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1851
1852 // Emit the 'then' code.
1853 CGF.EmitBlock(ThenBlock);
1854 ThenGen(CGF);
1855 CGF.EmitBranch(ContBlock);
1856 // Emit the 'else' code if present.
1857 // There is no need to emit line number for unconditional branch.
1859 CGF.EmitBlock(ElseBlock);
1860 ElseGen(CGF);
1861 // There is no need to emit line number for unconditional branch.
1863 CGF.EmitBranch(ContBlock);
1864 // Emit the continuation block for code after the if.
1865 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1866}
1867
1869 llvm::Function *OutlinedFn,
1870 ArrayRef<llvm::Value *> CapturedVars,
1871 const Expr *IfCond,
1872 llvm::Value *NumThreads) {
1873 if (!CGF.HaveInsertPoint())
1874 return;
1875 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1876 auto &M = CGM.getModule();
1877 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1878 this](CodeGenFunction &CGF, PrePostActionTy &) {
1879 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1881 llvm::Value *Args[] = {
1882 RTLoc,
1883 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1884 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1886 RealArgs.append(std::begin(Args), std::end(Args));
1887 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1888
1889 llvm::FunctionCallee RTLFn =
1890 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1891 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1892 };
1893 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1894 this](CodeGenFunction &CGF, PrePostActionTy &) {
1896 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1897 // Build calls:
1898 // __kmpc_serialized_parallel(&Loc, GTid);
1899 llvm::Value *Args[] = {RTLoc, ThreadID};
1900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1901 M, OMPRTL___kmpc_serialized_parallel),
1902 Args);
1903
1904 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1905 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1906 RawAddress ZeroAddrBound =
1908 /*Name=*/".bound.zero.addr");
1909 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1911 // ThreadId for serialized parallels is 0.
1912 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1913 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1914 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1915
1916 // Ensure we do not inline the function. This is trivially true for the ones
1917 // passed to __kmpc_fork_call but the ones called in serialized regions
1918 // could be inlined. This is not a perfect but it is closer to the invariant
1919 // we want, namely, every data environment starts with a new function.
1920 // TODO: We should pass the if condition to the runtime function and do the
1921 // handling there. Much cleaner code.
1922 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1923 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1924 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1925
1926 // __kmpc_end_serialized_parallel(&Loc, GTid);
1927 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1928 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1929 M, OMPRTL___kmpc_end_serialized_parallel),
1930 EndArgs);
1931 };
1932 if (IfCond) {
1933 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1934 } else {
1935 RegionCodeGenTy ThenRCG(ThenGen);
1936 ThenRCG(CGF);
1937 }
1938}
1939
1940// If we're inside an (outlined) parallel region, use the region info's
1941// thread-ID variable (it is passed in a first argument of the outlined function
1942// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1943// regular serial code region, get thread ID by calling kmp_int32
1944// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1945// return the address of that temp.
1948 if (auto *OMPRegionInfo =
1949 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1950 if (OMPRegionInfo->getThreadIDVariable())
1951 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1952
1953 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1954 QualType Int32Ty =
1955 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1956 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1957 CGF.EmitStoreOfScalar(ThreadID,
1958 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1959
1960 return ThreadIDTemp;
1961}
1962
1963llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1964 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1965 std::string Name = getName({Prefix, "var"});
1966 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1967}
1968
1969namespace {
1970/// Common pre(post)-action for different OpenMP constructs.
1971class CommonActionTy final : public PrePostActionTy {
1972 llvm::FunctionCallee EnterCallee;
1973 ArrayRef<llvm::Value *> EnterArgs;
1974 llvm::FunctionCallee ExitCallee;
1975 ArrayRef<llvm::Value *> ExitArgs;
1976 bool Conditional;
1977 llvm::BasicBlock *ContBlock = nullptr;
1978
1979public:
1980 CommonActionTy(llvm::FunctionCallee EnterCallee,
1981 ArrayRef<llvm::Value *> EnterArgs,
1982 llvm::FunctionCallee ExitCallee,
1983 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1984 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1985 ExitArgs(ExitArgs), Conditional(Conditional) {}
1986 void Enter(CodeGenFunction &CGF) override {
1987 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1988 if (Conditional) {
1989 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1990 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1991 ContBlock = CGF.createBasicBlock("omp_if.end");
1992 // Generate the branch (If-stmt)
1993 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1994 CGF.EmitBlock(ThenBlock);
1995 }
1996 }
1997 void Done(CodeGenFunction &CGF) {
1998 // Emit the rest of blocks/branches
1999 CGF.EmitBranch(ContBlock);
2000 CGF.EmitBlock(ContBlock, true);
2001 }
2002 void Exit(CodeGenFunction &CGF) override {
2003 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2004 }
2005};
2006} // anonymous namespace
2007
2009 StringRef CriticalName,
2010 const RegionCodeGenTy &CriticalOpGen,
2011 SourceLocation Loc, const Expr *Hint) {
2012 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2013 // CriticalOpGen();
2014 // __kmpc_end_critical(ident_t *, gtid, Lock);
2015 // Prepare arguments and build a call to __kmpc_critical
2016 if (!CGF.HaveInsertPoint())
2017 return;
2018 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2019 getCriticalRegionLock(CriticalName)};
2020 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2021 std::end(Args));
2022 if (Hint) {
2023 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2024 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2025 }
2026 CommonActionTy Action(
2027 OMPBuilder.getOrCreateRuntimeFunction(
2028 CGM.getModule(),
2029 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2030 EnterArgs,
2031 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2032 OMPRTL___kmpc_end_critical),
2033 Args);
2034 CriticalOpGen.setAction(Action);
2035 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2036}
2037
2039 const RegionCodeGenTy &MasterOpGen,
2041 if (!CGF.HaveInsertPoint())
2042 return;
2043 // if(__kmpc_master(ident_t *, gtid)) {
2044 // MasterOpGen();
2045 // __kmpc_end_master(ident_t *, gtid);
2046 // }
2047 // Prepare arguments and build a call to __kmpc_master
2048 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2049 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2050 CGM.getModule(), OMPRTL___kmpc_master),
2051 Args,
2052 OMPBuilder.getOrCreateRuntimeFunction(
2053 CGM.getModule(), OMPRTL___kmpc_end_master),
2054 Args,
2055 /*Conditional=*/true);
2056 MasterOpGen.setAction(Action);
2057 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2058 Action.Done(CGF);
2059}
2060
2062 const RegionCodeGenTy &MaskedOpGen,
2063 SourceLocation Loc, const Expr *Filter) {
2064 if (!CGF.HaveInsertPoint())
2065 return;
2066 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2067 // MaskedOpGen();
2068 // __kmpc_end_masked(iden_t *, gtid);
2069 // }
2070 // Prepare arguments and build a call to __kmpc_masked
2071 llvm::Value *FilterVal = Filter
2072 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2073 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2074 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2075 FilterVal};
2076 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2077 getThreadID(CGF, Loc)};
2078 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2079 CGM.getModule(), OMPRTL___kmpc_masked),
2080 Args,
2081 OMPBuilder.getOrCreateRuntimeFunction(
2082 CGM.getModule(), OMPRTL___kmpc_end_masked),
2083 ArgsEnd,
2084 /*Conditional=*/true);
2085 MaskedOpGen.setAction(Action);
2086 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2087 Action.Done(CGF);
2088}
2089
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2095 OMPBuilder.createTaskyield(CGF.Builder);
2096 } else {
2097 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2098 llvm::Value *Args[] = {
2100 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2101 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2102 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2103 Args);
2104 }
2105
2106 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2107 Region->emitUntiedSwitch(CGF);
2108}
2109
2111 const RegionCodeGenTy &TaskgroupOpGen,
2113 if (!CGF.HaveInsertPoint())
2114 return;
2115 // __kmpc_taskgroup(ident_t *, gtid);
2116 // TaskgroupOpGen();
2117 // __kmpc_end_taskgroup(ident_t *, gtid);
2118 // Prepare arguments and build a call to __kmpc_taskgroup
2119 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2120 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2121 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2122 Args,
2123 OMPBuilder.getOrCreateRuntimeFunction(
2124 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2125 Args);
2126 TaskgroupOpGen.setAction(Action);
2127 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2128}
2129
2130/// Given an array of pointers to variables, project the address of a
2131/// given variable.
2133 unsigned Index, const VarDecl *Var) {
2134 // Pull out the pointer to the variable.
2135 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2136 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2137
2138 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2139 return Address(
2140 CGF.Builder.CreateBitCast(
2141 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2142 ElemTy, CGF.getContext().getDeclAlign(Var));
2143}
2144
2146 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2147 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2148 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2150 ASTContext &C = CGM.getContext();
2151 // void copy_func(void *LHSArg, void *RHSArg);
2152 FunctionArgList Args;
2153 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2155 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2157 Args.push_back(&LHSArg);
2158 Args.push_back(&RHSArg);
2159 const auto &CGFI =
2160 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2161 std::string Name =
2162 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2163 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2164 llvm::GlobalValue::InternalLinkage, Name,
2165 &CGM.getModule());
2167 Fn->setDoesNotRecurse();
2168 CodeGenFunction CGF(CGM);
2169 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2170 // Dest = (void*[n])(LHSArg);
2171 // Src = (void*[n])(RHSArg);
2173 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2174 ArgsElemType->getPointerTo()),
2175 ArgsElemType, CGF.getPointerAlign());
2177 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2178 ArgsElemType->getPointerTo()),
2179 ArgsElemType, CGF.getPointerAlign());
2180 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2181 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2182 // ...
2183 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2184 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2185 const auto *DestVar =
2186 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2187 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2188
2189 const auto *SrcVar =
2190 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2191 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2192
2193 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2194 QualType Type = VD->getType();
2195 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2196 }
2197 CGF.FinishFunction();
2198 return Fn;
2199}
2200
2202 const RegionCodeGenTy &SingleOpGen,
2204 ArrayRef<const Expr *> CopyprivateVars,
2205 ArrayRef<const Expr *> SrcExprs,
2206 ArrayRef<const Expr *> DstExprs,
2207 ArrayRef<const Expr *> AssignmentOps) {
2208 if (!CGF.HaveInsertPoint())
2209 return;
2210 assert(CopyprivateVars.size() == SrcExprs.size() &&
2211 CopyprivateVars.size() == DstExprs.size() &&
2212 CopyprivateVars.size() == AssignmentOps.size());
2214 // int32 did_it = 0;
2215 // if(__kmpc_single(ident_t *, gtid)) {
2216 // SingleOpGen();
2217 // __kmpc_end_single(ident_t *, gtid);
2218 // did_it = 1;
2219 // }
2220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2221 // <copy_func>, did_it);
2222
2223 Address DidIt = Address::invalid();
2224 if (!CopyprivateVars.empty()) {
2225 // int32 did_it = 0;
2226 QualType KmpInt32Ty =
2227 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2228 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2229 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2230 }
2231 // Prepare arguments and build a call to __kmpc_single
2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234 CGM.getModule(), OMPRTL___kmpc_single),
2235 Args,
2236 OMPBuilder.getOrCreateRuntimeFunction(
2237 CGM.getModule(), OMPRTL___kmpc_end_single),
2238 Args,
2239 /*Conditional=*/true);
2240 SingleOpGen.setAction(Action);
2241 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2242 if (DidIt.isValid()) {
2243 // did_it = 1;
2244 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2245 }
2246 Action.Done(CGF);
2247 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2248 // <copy_func>, did_it);
2249 if (DidIt.isValid()) {
2250 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2251 QualType CopyprivateArrayTy = C.getConstantArrayType(
2252 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2253 /*IndexTypeQuals=*/0);
2254 // Create a list of all private variables for copyprivate.
2255 Address CopyprivateList =
2256 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2257 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2258 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2259 CGF.Builder.CreateStore(
2261 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2262 CGF.VoidPtrTy),
2263 Elem);
2264 }
2265 // Build function that copies private values from single region to all other
2266 // threads in the corresponding parallel region.
2267 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2268 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2269 SrcExprs, DstExprs, AssignmentOps, Loc);
2270 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2272 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2273 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2274 llvm::Value *Args[] = {
2275 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2276 getThreadID(CGF, Loc), // i32 <gtid>
2277 BufSize, // size_t <buf_size>
2278 CL.emitRawPointer(CGF), // void *<copyprivate list>
2279 CpyFn, // void (*) (void *, void *) <copy_func>
2280 DidItVal // i32 did_it
2281 };
2282 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2283 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2284 Args);
2285 }
2286}
2287
2289 const RegionCodeGenTy &OrderedOpGen,
2290 SourceLocation Loc, bool IsThreads) {
2291 if (!CGF.HaveInsertPoint())
2292 return;
2293 // __kmpc_ordered(ident_t *, gtid);
2294 // OrderedOpGen();
2295 // __kmpc_end_ordered(ident_t *, gtid);
2296 // Prepare arguments and build a call to __kmpc_ordered
2297 if (IsThreads) {
2298 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300 CGM.getModule(), OMPRTL___kmpc_ordered),
2301 Args,
2302 OMPBuilder.getOrCreateRuntimeFunction(
2303 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2304 Args);
2305 OrderedOpGen.setAction(Action);
2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307 return;
2308 }
2309 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2310}
2311
2313 unsigned Flags;
2314 if (Kind == OMPD_for)
2315 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2316 else if (Kind == OMPD_sections)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2318 else if (Kind == OMPD_single)
2319 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2320 else if (Kind == OMPD_barrier)
2321 Flags = OMP_IDENT_BARRIER_EXPL;
2322 else
2323 Flags = OMP_IDENT_BARRIER_IMPL;
2324 return Flags;
2325}
2326
2328 CodeGenFunction &CGF, const OMPLoopDirective &S,
2329 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2330 // Check if the loop directive is actually a doacross loop directive. In this
2331 // case choose static, 1 schedule.
2332 if (llvm::any_of(
2333 S.getClausesOfKind<OMPOrderedClause>(),
2334 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2335 ScheduleKind = OMPC_SCHEDULE_static;
2336 // Chunk size is 1 in this case.
2337 llvm::APInt ChunkSize(32, 1);
2338 ChunkExpr = IntegerLiteral::Create(
2339 CGF.getContext(), ChunkSize,
2340 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2341 SourceLocation());
2342 }
2343}
2344
2346 OpenMPDirectiveKind Kind, bool EmitChecks,
2347 bool ForceSimpleCall) {
2348 // Check if we should use the OMPBuilder
2349 auto *OMPRegionInfo =
2350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2351 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2352 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2353 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2354 return;
2355 }
2356
2357 if (!CGF.HaveInsertPoint())
2358 return;
2359 // Build call __kmpc_cancel_barrier(loc, thread_id);
2360 // Build call __kmpc_barrier(loc, thread_id);
2361 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2362 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2363 // thread_id);
2364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2365 getThreadID(CGF, Loc)};
2366 if (OMPRegionInfo) {
2367 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2368 llvm::Value *Result = CGF.EmitRuntimeCall(
2369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2370 OMPRTL___kmpc_cancel_barrier),
2371 Args);
2372 if (EmitChecks) {
2373 // if (__kmpc_cancel_barrier()) {
2374 // exit from construct;
2375 // }
2376 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2377 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2378 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2379 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2380 CGF.EmitBlock(ExitBB);
2381 // exit from construct;
2382 CodeGenFunction::JumpDest CancelDestination =
2383 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2384 CGF.EmitBranchThroughCleanup(CancelDestination);
2385 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2386 }
2387 return;
2388 }
2389 }
2390 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2391 CGM.getModule(), OMPRTL___kmpc_barrier),
2392 Args);
2393}
2394
2396 Expr *ME, bool IsFatal) {
2397 llvm::Value *MVL =
2398 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2399 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2400 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2401 // *message)
2402 llvm::Value *Args[] = {
2403 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2404 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2405 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2407 CGM.getModule(), OMPRTL___kmpc_error),
2408 Args);
2409}
2410
2411/// Map the OpenMP loop schedule to the runtime enumeration.
2412static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2413 bool Chunked, bool Ordered) {
2414 switch (ScheduleKind) {
2415 case OMPC_SCHEDULE_static:
2416 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2417 : (Ordered ? OMP_ord_static : OMP_sch_static);
2418 case OMPC_SCHEDULE_dynamic:
2419 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2420 case OMPC_SCHEDULE_guided:
2421 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2422 case OMPC_SCHEDULE_runtime:
2423 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2424 case OMPC_SCHEDULE_auto:
2425 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2427 assert(!Chunked && "chunk was specified but schedule kind not known");
2428 return Ordered ? OMP_ord_static : OMP_sch_static;
2429 }
2430 llvm_unreachable("Unexpected runtime schedule");
2431}
2432
2433/// Map the OpenMP distribute schedule to the runtime enumeration.
2434static OpenMPSchedType
2436 // only static is allowed for dist_schedule
2437 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2438}
2439
2441 bool Chunked) const {
2442 OpenMPSchedType Schedule =
2443 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2444 return Schedule == OMP_sch_static;
2445}
2446
2448 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2449 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2450 return Schedule == OMP_dist_sch_static;
2451}
2452
2454 bool Chunked) const {
2455 OpenMPSchedType Schedule =
2456 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2457 return Schedule == OMP_sch_static_chunked;
2458}
2459
2461 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2462 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2463 return Schedule == OMP_dist_sch_static_chunked;
2464}
2465
2467 OpenMPSchedType Schedule =
2468 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2469 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2470 return Schedule != OMP_sch_static;
2471}
2472
2473static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2476 int Modifier = 0;
2477 switch (M1) {
2478 case OMPC_SCHEDULE_MODIFIER_monotonic:
2479 Modifier = OMP_sch_modifier_monotonic;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2482 Modifier = OMP_sch_modifier_nonmonotonic;
2483 break;
2484 case OMPC_SCHEDULE_MODIFIER_simd:
2485 if (Schedule == OMP_sch_static_chunked)
2486 Schedule = OMP_sch_static_balanced_chunked;
2487 break;
2490 break;
2491 }
2492 switch (M2) {
2493 case OMPC_SCHEDULE_MODIFIER_monotonic:
2494 Modifier = OMP_sch_modifier_monotonic;
2495 break;
2496 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 break;
2499 case OMPC_SCHEDULE_MODIFIER_simd:
2500 if (Schedule == OMP_sch_static_chunked)
2501 Schedule = OMP_sch_static_balanced_chunked;
2502 break;
2505 break;
2506 }
2507 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2508 // If the static schedule kind is specified or if the ordered clause is
2509 // specified, and if the nonmonotonic modifier is not specified, the effect is
2510 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2511 // modifier is specified, the effect is as if the nonmonotonic modifier is
2512 // specified.
2513 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2514 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2515 Schedule == OMP_sch_static_balanced_chunked ||
2516 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2517 Schedule == OMP_dist_sch_static_chunked ||
2518 Schedule == OMP_dist_sch_static))
2519 Modifier = OMP_sch_modifier_nonmonotonic;
2520 }
2521 return Schedule | Modifier;
2522}
2523
2526 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2527 bool Ordered, const DispatchRTInput &DispatchValues) {
2528 if (!CGF.HaveInsertPoint())
2529 return;
2530 OpenMPSchedType Schedule = getRuntimeSchedule(
2531 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2532 assert(Ordered ||
2533 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2534 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2535 Schedule != OMP_sch_static_balanced_chunked));
2536 // Call __kmpc_dispatch_init(
2537 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2538 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2539 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2540
2541 // If the Chunk was not specified in the clause - use default value 1.
2542 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2543 : CGF.Builder.getIntN(IVSize, 1);
2544 llvm::Value *Args[] = {
2545 emitUpdateLocation(CGF, Loc),
2546 getThreadID(CGF, Loc),
2547 CGF.Builder.getInt32(addMonoNonMonoModifier(
2548 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2549 DispatchValues.LB, // Lower
2550 DispatchValues.UB, // Upper
2551 CGF.Builder.getIntN(IVSize, 1), // Stride
2552 Chunk // Chunk
2553 };
2554 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2555 Args);
2556}
2557
2560 if (!CGF.HaveInsertPoint())
2561 return;
2562 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2563 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2564 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2565}
2566
2568 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2569 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2571 const CGOpenMPRuntime::StaticRTInput &Values) {
2572 if (!CGF.HaveInsertPoint())
2573 return;
2574
2575 assert(!Values.Ordered);
2576 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2577 Schedule == OMP_sch_static_balanced_chunked ||
2578 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2579 Schedule == OMP_dist_sch_static ||
2580 Schedule == OMP_dist_sch_static_chunked);
2581
2582 // Call __kmpc_for_static_init(
2583 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2584 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2585 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2586 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2587 llvm::Value *Chunk = Values.Chunk;
2588 if (Chunk == nullptr) {
2589 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2590 Schedule == OMP_dist_sch_static) &&
2591 "expected static non-chunked schedule");
2592 // If the Chunk was not specified in the clause - use default value 1.
2593 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2594 } else {
2595 assert((Schedule == OMP_sch_static_chunked ||
2596 Schedule == OMP_sch_static_balanced_chunked ||
2597 Schedule == OMP_ord_static_chunked ||
2598 Schedule == OMP_dist_sch_static_chunked) &&
2599 "expected static chunked schedule");
2600 }
2601 llvm::Value *Args[] = {
2602 UpdateLocation,
2603 ThreadId,
2604 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2605 M2)), // Schedule type
2606 Values.IL.emitRawPointer(CGF), // &isLastIter
2607 Values.LB.emitRawPointer(CGF), // &LB
2608 Values.UB.emitRawPointer(CGF), // &UB
2609 Values.ST.emitRawPointer(CGF), // &Stride
2610 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2611 Chunk // Chunk
2612 };
2613 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2614}
2615
2618 OpenMPDirectiveKind DKind,
2619 const OpenMPScheduleTy &ScheduleKind,
2620 const StaticRTInput &Values) {
2621 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2622 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2623 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2624 "Expected loop-based or sections-based directive.");
2625 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2627 ? OMP_IDENT_WORK_LOOP
2628 : OMP_IDENT_WORK_SECTIONS);
2629 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2630 llvm::FunctionCallee StaticInitFunction =
2631 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2632 false);
2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2636}
2637
2641 const CGOpenMPRuntime::StaticRTInput &Values) {
2642 OpenMPSchedType ScheduleNum =
2643 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2644 llvm::Value *UpdatedLocation =
2645 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2646 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2647 llvm::FunctionCallee StaticInitFunction;
2648 bool isGPUDistribute =
2649 CGM.getLangOpts().OpenMPIsTargetDevice &&
2650 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2651 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2652 Values.IVSize, Values.IVSigned, isGPUDistribute);
2653
2654 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2655 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2657}
2658
2661 OpenMPDirectiveKind DKind) {
2662 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2663 DKind == OMPD_sections) &&
2664 "Expected distribute, for, or sections directive kind");
2665 if (!CGF.HaveInsertPoint())
2666 return;
2667 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2668 llvm::Value *Args[] = {
2671 (DKind == OMPD_target_teams_loop)
2672 ? OMP_IDENT_WORK_DISTRIBUTE
2673 : isOpenMPLoopDirective(DKind)
2674 ? OMP_IDENT_WORK_LOOP
2675 : OMP_IDENT_WORK_SECTIONS),
2676 getThreadID(CGF, Loc)};
2678 if (isOpenMPDistributeDirective(DKind) &&
2679 CGM.getLangOpts().OpenMPIsTargetDevice &&
2680 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2681 CGF.EmitRuntimeCall(
2682 OMPBuilder.getOrCreateRuntimeFunction(
2683 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2684 Args);
2685 else
2686 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2687 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2688 Args);
2689}
2690
2693 unsigned IVSize,
2694 bool IVSigned) {
2695 if (!CGF.HaveInsertPoint())
2696 return;
2697 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2698 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2699 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2700 Args);
2701}
2702
2704 SourceLocation Loc, unsigned IVSize,
2705 bool IVSigned, Address IL,
2706 Address LB, Address UB,
2707 Address ST) {
2708 // Call __kmpc_dispatch_next(
2709 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2710 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2711 // kmp_int[32|64] *p_stride);
2712 llvm::Value *Args[] = {
2714 IL.emitRawPointer(CGF), // &isLastIter
2715 LB.emitRawPointer(CGF), // &Lower
2716 UB.emitRawPointer(CGF), // &Upper
2717 ST.emitRawPointer(CGF) // &Stride
2718 };
2719 llvm::Value *Call = CGF.EmitRuntimeCall(
2720 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2721 return CGF.EmitScalarConversion(
2722 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2723 CGF.getContext().BoolTy, Loc);
2724}
2725
2727 llvm::Value *NumThreads,
2729 if (!CGF.HaveInsertPoint())
2730 return;
2731 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2732 llvm::Value *Args[] = {
2734 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2736 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2737 Args);
2738}
2739
2741 ProcBindKind ProcBind,
2743 if (!CGF.HaveInsertPoint())
2744 return;
2745 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2746 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2747 llvm::Value *Args[] = {
2749 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2750 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2751 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2752 Args);
2753}
2754
2756 SourceLocation Loc, llvm::AtomicOrdering AO) {
2757 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2758 OMPBuilder.createFlush(CGF.Builder);
2759 } else {
2760 if (!CGF.HaveInsertPoint())
2761 return;
2762 // Build call void __kmpc_flush(ident_t *loc)
2763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2764 CGM.getModule(), OMPRTL___kmpc_flush),
2765 emitUpdateLocation(CGF, Loc));
2766 }
2767}
2768
2769namespace {
2770/// Indexes of fields for type kmp_task_t.
2771enum KmpTaskTFields {
2772 /// List of shared variables.
2773 KmpTaskTShareds,
2774 /// Task routine.
2775 KmpTaskTRoutine,
2776 /// Partition id for the untied tasks.
2777 KmpTaskTPartId,
2778 /// Function with call of destructors for private variables.
2779 Data1,
2780 /// Task priority.
2781 Data2,
2782 /// (Taskloops only) Lower bound.
2783 KmpTaskTLowerBound,
2784 /// (Taskloops only) Upper bound.
2785 KmpTaskTUpperBound,
2786 /// (Taskloops only) Stride.
2787 KmpTaskTStride,
2788 /// (Taskloops only) Is last iteration flag.
2789 KmpTaskTLastIter,
2790 /// (Taskloops only) Reduction data.
2791 KmpTaskTReductions,
2792};
2793} // anonymous namespace
2794
2796 // If we are in simd mode or there are no entries, we don't need to do
2797 // anything.
2798 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2799 return;
2800
2801 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2802 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2803 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2805 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2806 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2808 I != E; ++I) {
2809 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2810 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2812 I->getFirst(), EntryInfo.Line, 1);
2813 break;
2814 }
2815 }
2816 }
2817 switch (Kind) {
2818 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2819 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2820 DiagnosticsEngine::Error, "Offloading entry for target region in "
2821 "%0 is incorrect: either the "
2822 "address or the ID is invalid.");
2823 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2824 } break;
2825 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2826 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2827 DiagnosticsEngine::Error, "Offloading entry for declare target "
2828 "variable %0 is incorrect: the "
2829 "address is invalid.");
2830 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2831 } break;
2832 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2833 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2835 "Offloading entry for declare target variable is incorrect: the "
2836 "address is invalid.");
2837 CGM.getDiags().Report(DiagID);
2838 } break;
2839 }
2840 };
2841
2842 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2843}
2844
2846 if (!KmpRoutineEntryPtrTy) {
2847 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2849 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2851 KmpRoutineEntryPtrQTy = C.getPointerType(
2852 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2854 }
2855}
2856
2857namespace {
2858struct PrivateHelpersTy {
2859 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2860 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2861 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2862 PrivateElemInit(PrivateElemInit) {}
2863 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2864 const Expr *OriginalRef = nullptr;
2865 const VarDecl *Original = nullptr;
2866 const VarDecl *PrivateCopy = nullptr;
2867 const VarDecl *PrivateElemInit = nullptr;
2868 bool isLocalPrivate() const {
2869 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2870 }
2871};
2872typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2873} // anonymous namespace
2874
2875static bool isAllocatableDecl(const VarDecl *VD) {
2876 const VarDecl *CVD = VD->getCanonicalDecl();
2877 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2878 return false;
2879 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2880 // Use the default allocation.
2881 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2882 !AA->getAllocator());
2883}
2884
2885static RecordDecl *
2887 if (!Privates.empty()) {
2888 ASTContext &C = CGM.getContext();
2889 // Build struct .kmp_privates_t. {
2890 // /* private vars */
2891 // };
2892 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2893 RD->startDefinition();
2894 for (const auto &Pair : Privates) {
2895 const VarDecl *VD = Pair.second.Original;
2897 // If the private variable is a local variable with lvalue ref type,
2898 // allocate the pointer instead of the pointee type.
2899 if (Pair.second.isLocalPrivate()) {
2900 if (VD->getType()->isLValueReferenceType())
2901 Type = C.getPointerType(Type);
2902 if (isAllocatableDecl(VD))
2903 Type = C.getPointerType(Type);
2904 }
2906 if (VD->hasAttrs()) {
2907 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2908 E(VD->getAttrs().end());
2909 I != E; ++I)
2910 FD->addAttr(*I);
2911 }
2912 }
2913 RD->completeDefinition();
2914 return RD;
2915 }
2916 return nullptr;
2917}
2918
2919static RecordDecl *
2921 QualType KmpInt32Ty,
2922 QualType KmpRoutineEntryPointerQTy) {
2923 ASTContext &C = CGM.getContext();
2924 // Build struct kmp_task_t {
2925 // void * shareds;
2926 // kmp_routine_entry_t routine;
2927 // kmp_int32 part_id;
2928 // kmp_cmplrdata_t data1;
2929 // kmp_cmplrdata_t data2;
2930 // For taskloops additional fields:
2931 // kmp_uint64 lb;
2932 // kmp_uint64 ub;
2933 // kmp_int64 st;
2934 // kmp_int32 liter;
2935 // void * reductions;
2936 // };
2937 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2938 UD->startDefinition();
2939 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2940 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2941 UD->completeDefinition();
2942 QualType KmpCmplrdataTy = C.getRecordType(UD);
2943 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2944 RD->startDefinition();
2945 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2946 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2947 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2949 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2950 if (isOpenMPTaskLoopDirective(Kind)) {
2951 QualType KmpUInt64Ty =
2952 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2953 QualType KmpInt64Ty =
2954 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2955 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2956 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2957 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2958 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2959 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2960 }
2961 RD->completeDefinition();
2962 return RD;
2963}
2964
2965static RecordDecl *
2967 ArrayRef<PrivateDataTy> Privates) {
2968 ASTContext &C = CGM.getContext();
2969 // Build struct kmp_task_t_with_privates {
2970 // kmp_task_t task_data;
2971 // .kmp_privates_t. privates;
2972 // };
2973 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2974 RD->startDefinition();
2975 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2976 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2977 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2978 RD->completeDefinition();
2979 return RD;
2980}
2981
2982/// Emit a proxy function which accepts kmp_task_t as the second
2983/// argument.
2984/// \code
2985/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2986/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2987/// For taskloops:
2988/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2989/// tt->reductions, tt->shareds);
2990/// return 0;
2991/// }
2992/// \endcode
2993static llvm::Function *
2995 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2996 QualType KmpTaskTWithPrivatesPtrQTy,
2997 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2998 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2999 llvm::Value *TaskPrivatesMap) {
3000 ASTContext &C = CGM.getContext();
3001 FunctionArgList Args;
3002 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3004 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3005 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3007 Args.push_back(&GtidArg);
3008 Args.push_back(&TaskTypeArg);
3009 const auto &TaskEntryFnInfo =
3010 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3011 llvm::FunctionType *TaskEntryTy =
3012 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3013 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3014 auto *TaskEntry = llvm::Function::Create(
3015 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3016 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3017 TaskEntry->setDoesNotRecurse();
3018 CodeGenFunction CGF(CGM);
3019 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3020 Loc, Loc);
3021
3022 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3023 // tt,
3024 // For taskloops:
3025 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3026 // tt->task_data.shareds);
3027 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3028 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3029 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3030 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3031 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3032 const auto *KmpTaskTWithPrivatesQTyRD =
3033 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3034 LValue Base =
3035 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3036 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3037 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3038 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3039 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3040
3041 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3042 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3043 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3045 CGF.ConvertTypeForMem(SharedsPtrTy));
3046
3047 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3048 llvm::Value *PrivatesParam;
3049 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3050 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3051 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3052 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3053 } else {
3054 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3055 }
3056
3057 llvm::Value *CommonArgs[] = {
3058 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3059 CGF.Builder
3061 CGF.VoidPtrTy, CGF.Int8Ty)
3062 .emitRawPointer(CGF)};
3063 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3064 std::end(CommonArgs));
3065 if (isOpenMPTaskLoopDirective(Kind)) {
3066 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3067 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3068 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3069 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3070 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3071 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3072 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3073 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3074 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3075 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3076 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3077 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3078 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3079 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3080 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3081 CallArgs.push_back(LBParam);
3082 CallArgs.push_back(UBParam);
3083 CallArgs.push_back(StParam);
3084 CallArgs.push_back(LIParam);
3085 CallArgs.push_back(RParam);
3086 }
3087 CallArgs.push_back(SharedsParam);
3088
3089 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3090 CallArgs);
3091 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3092 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3093 CGF.FinishFunction();
3094 return TaskEntry;
3095}
3096
3099 QualType KmpInt32Ty,
3100 QualType KmpTaskTWithPrivatesPtrQTy,
3101 QualType KmpTaskTWithPrivatesQTy) {
3102 ASTContext &C = CGM.getContext();
3103 FunctionArgList Args;
3104 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3106 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3107 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3109 Args.push_back(&GtidArg);
3110 Args.push_back(&TaskTypeArg);
3111 const auto &DestructorFnInfo =
3112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3113 llvm::FunctionType *DestructorFnTy =
3114 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3115 std::string Name =
3116 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3117 auto *DestructorFn =
3118 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3119 Name, &CGM.getModule());
3120 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3121 DestructorFnInfo);
3122 DestructorFn->setDoesNotRecurse();
3123 CodeGenFunction CGF(CGM);
3124 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3125 Args, Loc, Loc);
3126
3128 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3129 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3130 const auto *KmpTaskTWithPrivatesQTyRD =
3131 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3132 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3133 Base = CGF.EmitLValueForField(Base, *FI);
3134 for (const auto *Field :
3135 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3136 if (QualType::DestructionKind DtorKind =
3137 Field->getType().isDestructedType()) {
3138 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3139 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3140 }
3141 }
3142 CGF.FinishFunction();
3143 return DestructorFn;
3144}
3145
3146/// Emit a privates mapping function for correct handling of private and
3147/// firstprivate variables.
3148/// \code
3149/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3150/// **noalias priv1,..., <tyn> **noalias privn) {
3151/// *priv1 = &.privates.priv1;
3152/// ...;
3153/// *privn = &.privates.privn;
3154/// }
3155/// \endcode
3156static llvm::Value *
3158 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3159 ArrayRef<PrivateDataTy> Privates) {
3160 ASTContext &C = CGM.getContext();
3161 FunctionArgList Args;
3162 ImplicitParamDecl TaskPrivatesArg(
3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3166 Args.push_back(&TaskPrivatesArg);
3167 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3168 unsigned Counter = 1;
3169 for (const Expr *E : Data.PrivateVars) {
3170 Args.push_back(ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172 C.getPointerType(C.getPointerType(E->getType()))
3173 .withConst()
3174 .withRestrict(),
3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.FirstprivateVars) {
3181 Args.push_back(ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183 C.getPointerType(C.getPointerType(E->getType()))
3184 .withConst()
3185 .withRestrict(),
3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const Expr *E : Data.LastprivateVars) {
3192 Args.push_back(ImplicitParamDecl::Create(
3193 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3194 C.getPointerType(C.getPointerType(E->getType()))
3195 .withConst()
3196 .withRestrict(),
3198 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3199 PrivateVarsPos[VD] = Counter;
3200 ++Counter;
3201 }
3202 for (const VarDecl *VD : Data.PrivateLocals) {
3204 if (VD->getType()->isLValueReferenceType())
3205 Ty = C.getPointerType(Ty);
3206 if (isAllocatableDecl(VD))
3207 Ty = C.getPointerType(Ty);
3208 Args.push_back(ImplicitParamDecl::Create(
3209 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3210 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3212 PrivateVarsPos[VD] = Counter;
3213 ++Counter;
3214 }
3215 const auto &TaskPrivatesMapFnInfo =
3216 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3217 llvm::FunctionType *TaskPrivatesMapTy =
3218 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3219 std::string Name =
3220 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3221 auto *TaskPrivatesMap = llvm::Function::Create(
3222 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3223 &CGM.getModule());
3224 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3225 TaskPrivatesMapFnInfo);
3226 if (CGM.getLangOpts().Optimize) {
3227 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3228 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3229 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3230 }
3231 CodeGenFunction CGF(CGM);
3232 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3233 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3234
3235 // *privi = &.privates.privi;
3237 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3238 TaskPrivatesArg.getType()->castAs<PointerType>());
3239 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3240 Counter = 0;
3241 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3242 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3243 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3244 LValue RefLVal =
3245 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3246 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3247 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3248 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3249 ++Counter;
3250 }
3251 CGF.FinishFunction();
3252 return TaskPrivatesMap;
3253}
3254
3255/// Emit initialization for private variables in task-based directives.
3258 Address KmpTaskSharedsPtr, LValue TDBase,
3259 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3260 QualType SharedsTy, QualType SharedsPtrTy,
3261 const OMPTaskDataTy &Data,
3262 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3263 ASTContext &C = CGF.getContext();
3264 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3265 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3266 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3267 ? OMPD_taskloop
3268 : OMPD_task;
3269 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3270 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3271 LValue SrcBase;
3272 bool IsTargetTask =
3273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3274 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3275 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3276 // PointersArray, SizesArray, and MappersArray. The original variables for
3277 // these arrays are not captured and we get their addresses explicitly.
3278 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3279 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3280 SrcBase = CGF.MakeAddrLValue(
3282 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3283 CGF.ConvertTypeForMem(SharedsTy)),
3284 SharedsTy);
3285 }
3286 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3287 for (const PrivateDataTy &Pair : Privates) {
3288 // Do not initialize private locals.
3289 if (Pair.second.isLocalPrivate()) {
3290 ++FI;
3291 continue;
3292 }
3293 const VarDecl *VD = Pair.second.PrivateCopy;
3294 const Expr *Init = VD->getAnyInitializer();
3295 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3296 !CGF.isTrivialInitializer(Init)))) {
3297 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3298 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3299 const VarDecl *OriginalVD = Pair.second.Original;
3300 // Check if the variable is the target-based BasePointersArray,
3301 // PointersArray, SizesArray, or MappersArray.
3302 LValue SharedRefLValue;
3303 QualType Type = PrivateLValue.getType();
3304 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3305 if (IsTargetTask && !SharedField) {
3306 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3307 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3308 cast<CapturedDecl>(OriginalVD->getDeclContext())
3309 ->getNumParams() == 0 &&
3310 isa<TranslationUnitDecl>(
3311 cast<CapturedDecl>(OriginalVD->getDeclContext())
3312 ->getDeclContext()) &&
3313 "Expected artificial target data variable.");
3314 SharedRefLValue =
3315 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3316 } else if (ForDup) {
3317 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3318 SharedRefLValue = CGF.MakeAddrLValue(
3319 SharedRefLValue.getAddress().withAlignment(
3320 C.getDeclAlign(OriginalVD)),
3321 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3322 SharedRefLValue.getTBAAInfo());
3323 } else if (CGF.LambdaCaptureFields.count(
3324 Pair.second.Original->getCanonicalDecl()) > 0 ||
3325 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3326 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3327 } else {
3328 // Processing for implicitly captured variables.
3329 InlinedOpenMPRegionRAII Region(
3330 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3331 /*HasCancel=*/false, /*NoInheritance=*/true);
3332 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3333 }
3334 if (Type->isArrayType()) {
3335 // Initialize firstprivate array.
3336 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3337 // Perform simple memcpy.
3338 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3339 } else {
3340 // Initialize firstprivate array using element-by-element
3341 // initialization.
3343 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3344 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3345 Address SrcElement) {
3346 // Clean up any temporaries needed by the initialization.
3347 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3348 InitScope.addPrivate(Elem, SrcElement);
3349 (void)InitScope.Privatize();
3350 // Emit initialization for single element.
3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3352 CGF, &CapturesInfo);
3353 CGF.EmitAnyExprToMem(Init, DestElement,
3354 Init->getType().getQualifiers(),
3355 /*IsInitializer=*/false);
3356 });
3357 }
3358 } else {
3359 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3360 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3361 (void)InitScope.Privatize();
3362 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3363 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3364 /*capturedByInit=*/false);
3365 }
3366 } else {
3367 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3368 }
3369 }
3370 ++FI;
3371 }
3372}
3373
3374/// Check if duplication function is required for taskloops.
3376 ArrayRef<PrivateDataTy> Privates) {
3377 bool InitRequired = false;
3378 for (const PrivateDataTy &Pair : Privates) {
3379 if (Pair.second.isLocalPrivate())
3380 continue;
3381 const VarDecl *VD = Pair.second.PrivateCopy;
3382 const Expr *Init = VD->getAnyInitializer();
3383 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3385 if (InitRequired)
3386 break;
3387 }
3388 return InitRequired;
3389}
3390
3391
3392/// Emit task_dup function (for initialization of
3393/// private/firstprivate/lastprivate vars and last_iter flag)
3394/// \code
3395/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3396/// lastpriv) {
3397/// // setup lastprivate flag
3398/// task_dst->last = lastpriv;
3399/// // could be constructor calls here...
3400/// }
3401/// \endcode
3402static llvm::Value *
3405 QualType KmpTaskTWithPrivatesPtrQTy,
3406 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3407 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3408 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3409 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3410 ASTContext &C = CGM.getContext();
3411 FunctionArgList Args;
3412 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3413 KmpTaskTWithPrivatesPtrQTy,
3415 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3416 KmpTaskTWithPrivatesPtrQTy,
3418 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3420 Args.push_back(&DstArg);
3421 Args.push_back(&SrcArg);
3422 Args.push_back(&LastprivArg);
3423 const auto &TaskDupFnInfo =
3424 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3425 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3426 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3427 auto *TaskDup = llvm::Function::Create(
3428 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3429 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3430 TaskDup->setDoesNotRecurse();
3431 CodeGenFunction CGF(CGM);
3432 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3433 Loc);
3434
3435 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3436 CGF.GetAddrOfLocalVar(&DstArg),
3437 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3438 // task_dst->liter = lastpriv;
3439 if (WithLastIter) {
3440 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3442 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3443 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3444 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3445 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3446 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3447 }
3448
3449 // Emit initial values for private copies (if any).
3450 assert(!Privates.empty());
3451 Address KmpTaskSharedsPtr = Address::invalid();
3452 if (!Data.FirstprivateVars.empty()) {
3453 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3454 CGF.GetAddrOfLocalVar(&SrcArg),
3455 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3457 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3458 KmpTaskSharedsPtr = Address(
3460 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3461 KmpTaskTShareds)),
3462 Loc),
3463 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3464 }
3465 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3466 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3467 CGF.FinishFunction();
3468 return TaskDup;
3469}
3470
3471/// Checks if destructor function is required to be generated.
3472/// \return true if cleanups are required, false otherwise.
3473static bool
3474checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3475 ArrayRef<PrivateDataTy> Privates) {
3476 for (const PrivateDataTy &P : Privates) {
3477 if (P.second.isLocalPrivate())
3478 continue;
3479 QualType Ty = P.second.Original->getType().getNonReferenceType();
3480 if (Ty.isDestructedType())
3481 return true;
3482 }
3483 return false;
3484}
3485
3486namespace {
3487/// Loop generator for OpenMP iterator expression.
3488class OMPIteratorGeneratorScope final
3489 : public CodeGenFunction::OMPPrivateScope {
3490 CodeGenFunction &CGF;
3491 const OMPIteratorExpr *E = nullptr;
3494 OMPIteratorGeneratorScope() = delete;
3495 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3496
3497public:
3498 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3499 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3500 if (!E)
3501 return;
3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3505 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3506 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3507 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3508 addPrivate(
3509 HelperData.CounterVD,
3510 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3511 }
3512 Privatize();
3513
3514 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3515 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3516 LValue CLVal =
3517 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3518 HelperData.CounterVD->getType());
3519 // Counter = 0;
3521 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3522 CLVal);
3523 CodeGenFunction::JumpDest &ContDest =
3524 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3525 CodeGenFunction::JumpDest &ExitDest =
3526 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3527 // N = <number-of_iterations>;
3528 llvm::Value *N = Uppers[I];
3529 // cont:
3530 // if (Counter < N) goto body; else goto exit;
3531 CGF.EmitBlock(ContDest.getBlock());
3532 auto *CVal =
3533 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3534 llvm::Value *Cmp =
3536 ? CGF.Builder.CreateICmpSLT(CVal, N)
3537 : CGF.Builder.CreateICmpULT(CVal, N);
3538 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3539 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3540 // body:
3541 CGF.EmitBlock(BodyBB);
3542 // Iteri = Begini + Counter * Stepi;
3543 CGF.EmitIgnoredExpr(HelperData.Update);
3544 }
3545 }
3546 ~OMPIteratorGeneratorScope() {
3547 if (!E)
3548 return;
3549 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3550 // Counter = Counter + 1;
3551 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3552 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3553 // goto cont;
3554 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3555 // exit:
3556 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3557 }
3558 }
3559};
3560} // namespace
3561
3562static std::pair<llvm::Value *, llvm::Value *>
3564 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3565 llvm::Value *Addr;
3566 if (OASE) {
3567 const Expr *Base = OASE->getBase();
3568 Addr = CGF.EmitScalarExpr(Base);
3569 } else {
3570 Addr = CGF.EmitLValue(E).getPointer(CGF);
3571 }
3572 llvm::Value *SizeVal;
3573 QualType Ty = E->getType();
3574 if (OASE) {
3575 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3576 for (const Expr *SE : OASE->getDimensions()) {
3577 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3578 Sz = CGF.EmitScalarConversion(
3579 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3580 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3581 }
3582 } else if (const auto *ASE =
3583 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3584 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3585 Address UpAddrAddress = UpAddrLVal.getAddress();
3586 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3587 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3588 /*Idx0=*/1);
3589 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3590 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3591 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3592 } else {
3593 SizeVal = CGF.getTypeSize(Ty);
3594 }
3595 return std::make_pair(Addr, SizeVal);
3596}
3597
3598/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3599static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3600 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3601 if (KmpTaskAffinityInfoTy.isNull()) {
3602 RecordDecl *KmpAffinityInfoRD =
3603 C.buildImplicitRecord("kmp_task_affinity_info_t");
3604 KmpAffinityInfoRD->startDefinition();
3605 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3606 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3607 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3608 KmpAffinityInfoRD->completeDefinition();
3609 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3610 }
3611}
3612
3616 llvm::Function *TaskFunction, QualType SharedsTy,
3617 Address Shareds, const OMPTaskDataTy &Data) {
3620 // Aggregate privates and sort them by the alignment.
3621 const auto *I = Data.PrivateCopies.begin();
3622 for (const Expr *E : Data.PrivateVars) {
3623 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624 Privates.emplace_back(
3625 C.getDeclAlign(VD),
3626 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627 /*PrivateElemInit=*/nullptr));
3628 ++I;
3629 }
3630 I = Data.FirstprivateCopies.begin();
3631 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3632 for (const Expr *E : Data.FirstprivateVars) {
3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634 Privates.emplace_back(
3635 C.getDeclAlign(VD),
3636 PrivateHelpersTy(
3637 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3638 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3639 ++I;
3640 ++IElemInitRef;
3641 }
3642 I = Data.LastprivateCopies.begin();
3643 for (const Expr *E : Data.LastprivateVars) {
3644 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3645 Privates.emplace_back(
3646 C.getDeclAlign(VD),
3647 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3648 /*PrivateElemInit=*/nullptr));
3649 ++I;
3650 }
3651 for (const VarDecl *VD : Data.PrivateLocals) {
3652 if (isAllocatableDecl(VD))
3653 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3654 else
3655 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3656 }
3657 llvm::stable_sort(Privates,
3658 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3659 return L.first > R.first;
3660 });
3661 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3662 // Build type kmp_routine_entry_t (if not built yet).
3663 emitKmpRoutineEntryT(KmpInt32Ty);
3664 // Build type kmp_task_t (if not built yet).
3665 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3668 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3669 }
3671 } else {
3672 assert((D.getDirectiveKind() == OMPD_task ||
3673 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3674 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3675 "Expected taskloop, task or target directive");
3676 if (SavedKmpTaskTQTy.isNull()) {
3678 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3679 }
3681 }
3682 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3683 // Build particular struct kmp_task_t for the given task.
3684 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3686 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3687 QualType KmpTaskTWithPrivatesPtrQTy =
3688 C.getPointerType(KmpTaskTWithPrivatesQTy);
3689 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3690 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3691 KmpTaskTWithPrivatesTy->getPointerTo();
3692 llvm::Value *KmpTaskTWithPrivatesTySize =
3693 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3694 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3695
3696 // Emit initial values for private copies (if any).
3697 llvm::Value *TaskPrivatesMap = nullptr;
3698 llvm::Type *TaskPrivatesMapTy =
3699 std::next(TaskFunction->arg_begin(), 3)->getType();
3700 if (!Privates.empty()) {
3701 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3702 TaskPrivatesMap =
3703 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3704 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3705 TaskPrivatesMap, TaskPrivatesMapTy);
3706 } else {
3707 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3708 cast<llvm::PointerType>(TaskPrivatesMapTy));
3709 }
3710 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3711 // kmp_task_t *tt);
3712 llvm::Function *TaskEntry = emitProxyTaskFunction(
3713 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3714 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3715 TaskPrivatesMap);
3716
3717 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3718 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3719 // kmp_routine_entry_t *task_entry);
3720 // Task flags. Format is taken from
3721 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3722 // description of kmp_tasking_flags struct.
3723 enum {
3724 TiedFlag = 0x1,
3725 FinalFlag = 0x2,
3726 DestructorsFlag = 0x8,
3727 PriorityFlag = 0x20,
3728 DetachableFlag = 0x40,
3729 };
3730 unsigned Flags = Data.Tied ? TiedFlag : 0;
3731 bool NeedsCleanup = false;
3732 if (!Privates.empty()) {
3733 NeedsCleanup =
3734 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3735 if (NeedsCleanup)
3736 Flags = Flags | DestructorsFlag;
3737 }
3738 if (Data.Priority.getInt())
3739 Flags = Flags | PriorityFlag;
3740 if (D.hasClausesOfKind<OMPDetachClause>())
3741 Flags = Flags | DetachableFlag;
3742 llvm::Value *TaskFlags =
3743 Data.Final.getPointer()
3744 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3745 CGF.Builder.getInt32(FinalFlag),
3746 CGF.Builder.getInt32(/*C=*/0))
3747 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3748 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3749 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3751 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3753 TaskEntry, KmpRoutineEntryPtrTy)};
3754 llvm::Value *NewTask;
3755 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3756 // Check if we have any device clause associated with the directive.
3757 const Expr *Device = nullptr;
3758 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3759 Device = C->getDevice();
3760 // Emit device ID if any otherwise use default value.
3761 llvm::Value *DeviceID;
3762 if (Device)
3763 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3764 CGF.Int64Ty, /*isSigned=*/true);
3765 else
3766 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3767 AllocArgs.push_back(DeviceID);
3768 NewTask = CGF.EmitRuntimeCall(
3769 OMPBuilder.getOrCreateRuntimeFunction(
3770 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3771 AllocArgs);
3772 } else {
3773 NewTask =
3774 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3775 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3776 AllocArgs);
3777 }
3778 // Emit detach clause initialization.
3779 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3780 // task_descriptor);
3781 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3782 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3783 LValue EvtLVal = CGF.EmitLValue(Evt);
3784
3785 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3786 // int gtid, kmp_task_t *task);
3787 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3788 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3789 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3790 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3791 OMPBuilder.getOrCreateRuntimeFunction(
3792 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3793 {Loc, Tid, NewTask});
3794 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3795 Evt->getExprLoc());
3796 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3797 }
3798 // Process affinity clauses.
3799 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3800 // Process list of affinity data.
3802 Address AffinitiesArray = Address::invalid();
3803 // Calculate number of elements to form the array of affinity data.
3804 llvm::Value *NumOfElements = nullptr;
3805 unsigned NumAffinities = 0;
3806 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3807 if (const Expr *Modifier = C->getModifier()) {
3808 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3809 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3810 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3811 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3812 NumOfElements =
3813 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3814 }
3815 } else {
3816 NumAffinities += C->varlist_size();
3817 }
3818 }
3820 // Fields ids in kmp_task_affinity_info record.
3821 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3822
3823 QualType KmpTaskAffinityInfoArrayTy;
3824 if (NumOfElements) {
3825 NumOfElements = CGF.Builder.CreateNUWAdd(
3826 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3827 auto *OVE = new (C) OpaqueValueExpr(
3828 Loc,
3829 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3830 VK_PRValue);
3831 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3832 RValue::get(NumOfElements));
3833 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3835 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3836 // Properly emit variable-sized array.
3837 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3839 CGF.EmitVarDecl(*PD);
3840 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3841 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3842 /*isSigned=*/false);
3843 } else {
3844 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3846 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3847 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3848 AffinitiesArray =
3849 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3850 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3851 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3852 /*isSigned=*/false);
3853 }
3854
3855 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3856 // Fill array by elements without iterators.
3857 unsigned Pos = 0;
3858 bool HasIterator = false;
3859 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3860 if (C->getModifier()) {
3861 HasIterator = true;
3862 continue;
3863 }
3864 for (const Expr *E : C->varlists()) {
3865 llvm::Value *Addr;
3866 llvm::Value *Size;
3867 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3868 LValue Base =
3869 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3871 // affs[i].base_addr = &<Affinities[i].second>;
3872 LValue BaseAddrLVal = CGF.EmitLValueForField(
3873 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3874 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3875 BaseAddrLVal);
3876 // affs[i].len = sizeof(<Affinities[i].second>);
3877 LValue LenLVal = CGF.EmitLValueForField(
3878 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3879 CGF.EmitStoreOfScalar(Size, LenLVal);
3880 ++Pos;
3881 }
3882 }
3883 LValue PosLVal;
3884 if (HasIterator) {
3885 PosLVal = CGF.MakeAddrLValue(
3886 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3887 C.getSizeType());
3888 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3889 }
3890 // Process elements with iterators.
3891 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3892 const Expr *Modifier = C->getModifier();
3893 if (!Modifier)
3894 continue;
3895 OMPIteratorGeneratorScope IteratorScope(
3896 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3897 for (const Expr *E : C->varlists()) {
3898 llvm::Value *Addr;
3899 llvm::Value *Size;
3900 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3901 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3902 LValue Base =
3903 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3905 // affs[i].base_addr = &<Affinities[i].second>;
3906 LValue BaseAddrLVal = CGF.EmitLValueForField(
3907 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3908 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3909 BaseAddrLVal);
3910 // affs[i].len = sizeof(<Affinities[i].second>);
3911 LValue LenLVal = CGF.EmitLValueForField(
3912 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3913 CGF.EmitStoreOfScalar(Size, LenLVal);
3914 Idx = CGF.Builder.CreateNUWAdd(
3915 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3916 CGF.EmitStoreOfScalar(Idx, PosLVal);
3917 }
3918 }
3919 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3920 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3921 // naffins, kmp_task_affinity_info_t *affin_list);
3922 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3923 llvm::Value *GTid = getThreadID(CGF, Loc);
3924 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3925 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3926 // FIXME: Emit the function and ignore its result for now unless the
3927 // runtime function is properly implemented.
3928 (void)CGF.EmitRuntimeCall(
3929 OMPBuilder.getOrCreateRuntimeFunction(
3930 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3931 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3932 }
3933 llvm::Value *NewTaskNewTaskTTy =
3935 NewTask, KmpTaskTWithPrivatesPtrTy);
3936 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3937 KmpTaskTWithPrivatesQTy);
3938 LValue TDBase =
3939 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3940 // Fill the data in the resulting kmp_task_t record.
3941 // Copy shareds if there are any.
3942 Address KmpTaskSharedsPtr = Address::invalid();
3943 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3944 KmpTaskSharedsPtr = Address(
3945 CGF.EmitLoadOfScalar(
3947 TDBase,
3948 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3949 Loc),
3950 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3951 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3952 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3953 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3954 }
3955 // Emit initial values for private copies (if any).
3957 if (!Privates.empty()) {
3958 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3959 SharedsTy, SharedsPtrTy, Data, Privates,
3960 /*ForDup=*/false);
3961 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3962 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3963 Result.TaskDupFn = emitTaskDupFunction(
3964 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3965 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3966 /*WithLastIter=*/!Data.LastprivateVars.empty());
3967 }
3968 }
3969 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3970 enum { Priority = 0, Destructors = 1 };
3971 // Provide pointer to function with destructors for privates.
3972 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3973 const RecordDecl *KmpCmplrdataUD =
3974 (*FI)->getType()->getAsUnionType()->getDecl();
3975 if (NeedsCleanup) {
3976 llvm::Value *DestructorFn = emitDestructorsFunction(
3977 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3978 KmpTaskTWithPrivatesQTy);
3979 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3980 LValue DestructorsLV = CGF.EmitLValueForField(
3981 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3983 DestructorFn, KmpRoutineEntryPtrTy),
3984 DestructorsLV);
3985 }
3986 // Set priority.
3987 if (Data.Priority.getInt()) {
3988 LValue Data2LV = CGF.EmitLValueForField(
3989 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3990 LValue PriorityLV = CGF.EmitLValueForField(
3991 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3992 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3993 }
3994 Result.NewTask = NewTask;
3995 Result.TaskEntry = TaskEntry;
3996 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3997 Result.TDBase = TDBase;
3998 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3999 return Result;
4000}
4001
4002/// Translates internal dependency kind into the runtime kind.
4004 RTLDependenceKindTy DepKind;
4005 switch (K) {
4006 case OMPC_DEPEND_in:
4007 DepKind = RTLDependenceKindTy::DepIn;
4008 break;
4009 // Out and InOut dependencies must use the same code.
4010 case OMPC_DEPEND_out:
4011 case OMPC_DEPEND_inout:
4012 DepKind = RTLDependenceKindTy::DepInOut;
4013 break;
4014 case OMPC_DEPEND_mutexinoutset:
4015 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4016 break;
4017 case OMPC_DEPEND_inoutset:
4018 DepKind = RTLDependenceKindTy::DepInOutSet;
4019 break;
4020 case OMPC_DEPEND_outallmemory:
4021 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4022 break;
4023 case OMPC_DEPEND_source:
4024 case OMPC_DEPEND_sink:
4025 case OMPC_DEPEND_depobj:
4026 case OMPC_DEPEND_inoutallmemory:
4028 llvm_unreachable("Unknown task dependence type");
4029 }
4030 return DepKind;
4031}
4032
4033/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4034static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4035 QualType &FlagsTy) {
4036 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4037 if (KmpDependInfoTy.isNull()) {
4038 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4039 KmpDependInfoRD->startDefinition();
4040 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4041 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4042 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4043 KmpDependInfoRD->completeDefinition();
4044 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4045 }
4046}
4047
4048std::pair<llvm::Value *, LValue>
4052 QualType FlagsTy;
4053 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4054 RecordDecl *KmpDependInfoRD =
4055 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4056 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4058 DepobjLVal.getAddress().withElementType(
4059 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4060 KmpDependInfoPtrTy->castAs<PointerType>());
4061 Address DepObjAddr = CGF.Builder.CreateGEP(
4062 CGF, Base.getAddress(),
4063 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4064 LValue NumDepsBase = CGF.MakeAddrLValue(
4065 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4066 // NumDeps = deps[i].base_addr;
4067 LValue BaseAddrLVal = CGF.EmitLValueForField(
4068 NumDepsBase,
4069 *std::next(KmpDependInfoRD->field_begin(),
4070 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4071 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4072 return std::make_pair(NumDeps, Base);
4073}
4074
4075static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4076 llvm::PointerUnion<unsigned *, LValue *> Pos,
4078 Address DependenciesArray) {
4079 CodeGenModule &CGM = CGF.CGM;
4080 ASTContext &C = CGM.getContext();
4081 QualType FlagsTy;
4082 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4083 RecordDecl *KmpDependInfoRD =
4084 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4085 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4086
4087 OMPIteratorGeneratorScope IteratorScope(
4088 CGF, cast_or_null<OMPIteratorExpr>(
4089 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4090 : nullptr));
4091 for (const Expr *E : Data.DepExprs) {
4092 llvm::Value *Addr;
4093 llvm::Value *Size;
4094
4095 // The expression will be a nullptr in the 'omp_all_memory' case.
4096 if (E) {
4097 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4098 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4099 } else {
4100 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4101 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4102 }
4103 LValue Base;
4104 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4105 Base = CGF.MakeAddrLValue(
4106 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4107 } else {
4108 assert(E && "Expected a non-null expression");
4109 LValue &PosLVal = *Pos.get<LValue *>();
4110 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4111 Base = CGF.MakeAddrLValue(
4112 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4113 }
4114 // deps[i].base_addr = &<Dependencies[i].second>;
4115 LValue BaseAddrLVal = CGF.EmitLValueForField(
4116 Base,
4117 *std::next(KmpDependInfoRD->field_begin(),
4118 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4119 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4120 // deps[i].len = sizeof(<Dependencies[i].second>);
4121 LValue LenLVal = CGF.EmitLValueForField(
4122 Base, *std::next(KmpDependInfoRD->field_begin(),
4123 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4124 CGF.EmitStoreOfScalar(Size, LenLVal);
4125 // deps[i].flags = <Dependencies[i].first>;
4126 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4127 LValue FlagsLVal = CGF.EmitLValueForField(
4128 Base,
4129 *std::next(KmpDependInfoRD->field_begin(),
4130 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4132 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4133 FlagsLVal);
4134 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4135 ++(*P);
4136 } else {
4137 LValue &PosLVal = *Pos.get<LValue *>();
4138 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4139 Idx = CGF.Builder.CreateNUWAdd(Idx,
4140 llvm::ConstantInt::get(Idx->getType(), 1));
4141 CGF.EmitStoreOfScalar(Idx, PosLVal);
4142 }
4143 }
4144}
4145
4147 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4149 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4150 "Expected depobj dependency kind.");
4152 SmallVector<LValue, 4> SizeLVals;
4153 ASTContext &C = CGF.getContext();
4154 {
4155 OMPIteratorGeneratorScope IteratorScope(
4156 CGF, cast_or_null<OMPIteratorExpr>(
4157 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4158 : nullptr));
4159 for (const Expr *E : Data.DepExprs) {
4160 llvm::Value *NumDeps;
4161 LValue Base;
4162 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4163 std::tie(NumDeps, Base) =
4164 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4165 LValue NumLVal = CGF.MakeAddrLValue(
4166 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4167 C.getUIntPtrType());
4168 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4169 NumLVal.getAddress());
4170 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4171 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4172 CGF.EmitStoreOfScalar(Add, NumLVal);
4173 SizeLVals.push_back(NumLVal);
4174 }
4175 }
4176 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4177 llvm::Value *Size =
4178 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4179 Sizes.push_back(Size);
4180 }
4181 return Sizes;
4182}
4183
4185 QualType &KmpDependInfoTy,
4186 LValue PosLVal,
4188 Address DependenciesArray) {
4189 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4190 "Expected depobj dependency kind.");
4191 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4192 {
4193 OMPIteratorGeneratorScope IteratorScope(
4194 CGF, cast_or_null<OMPIteratorExpr>(
4195 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4196 : nullptr));
4197 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4198 const Expr *E = Data.DepExprs[I];
4199 llvm::Value *NumDeps;
4200 LValue Base;
4201 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4202 std::tie(NumDeps, Base) =
4203 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4204
4205 // memcopy dependency data.
4206 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4207 ElSize,
4208 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4209 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4210 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4211 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4212
4213 // Increase pos.
4214 // pos += size;
4215 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4216 CGF.EmitStoreOfScalar(Add, PosLVal);
4217 }
4218 }
4219}
4220
4221std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4224 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4225 return D.DepExprs.empty();
4226 }))
4227 return std::make_pair(nullptr, Address::invalid());
4228 // Process list of dependencies.
4230 Address DependenciesArray = Address::invalid();
4231 llvm::Value *NumOfElements = nullptr;
4232 unsigned NumDependencies = std::accumulate(
4233 Dependencies.begin(), Dependencies.end(), 0,
4234 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4235 return D.DepKind == OMPC_DEPEND_depobj
4236 ? V
4237 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4238 });
4239 QualType FlagsTy;
4240 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241 bool HasDepobjDeps = false;
4242 bool HasRegularWithIterators = false;
4243 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4244 llvm::Value *NumOfRegularWithIterators =
4245 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4246 // Calculate number of depobj dependencies and regular deps with the
4247 // iterators.
4248 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4249 if (D.DepKind == OMPC_DEPEND_depobj) {
4252 for (llvm::Value *Size : Sizes) {
4253 NumOfDepobjElements =
4254 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4255 }
4256 HasDepobjDeps = true;
4257 continue;
4258 }
4259 // Include number of iterations, if any.
4260
4261 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4262 llvm::Value *ClauseIteratorSpace =
4263 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4264 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4265 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4266 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4267 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4268 }
4269 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4270 ClauseIteratorSpace,
4271 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4272 NumOfRegularWithIterators =
4273 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4274 HasRegularWithIterators = true;
4275 continue;
4276 }
4277 }
4278
4279 QualType KmpDependInfoArrayTy;
4280 if (HasDepobjDeps || HasRegularWithIterators) {
4281 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4282 /*isSigned=*/false);
4283 if (HasDepobjDeps) {
4284 NumOfElements =
4285 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4286 }
4287 if (HasRegularWithIterators) {
4288 NumOfElements =
4289 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4290 }
4291 auto *OVE = new (C) OpaqueValueExpr(
4292 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4293 VK_PRValue);
4294 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4295 RValue::get(NumOfElements));
4296 KmpDependInfoArrayTy =
4297 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4298 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4299 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4300 // Properly emit variable-sized array.
4301 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4303 CGF.EmitVarDecl(*PD);
4304 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4305 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4306 /*isSigned=*/false);
4307 } else {
4308 KmpDependInfoArrayTy = C.getConstantArrayType(
4309 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4310 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4311 DependenciesArray =
4312 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4313 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4314 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4315 /*isSigned=*/false);
4316 }
4317 unsigned Pos = 0;
4318 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4319 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4320 Dependencies[I].IteratorExpr)
4321 continue;
4322 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4323 DependenciesArray);
4324 }
4325 // Copy regular dependencies with iterators.
4326 LValue PosLVal = CGF.MakeAddrLValue(
4327 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4328 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4329 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4330 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4331 !Dependencies[I].IteratorExpr)
4332 continue;
4333 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4334 DependenciesArray);
4335 }
4336 // Copy final depobj arrays without iterators.
4337 if (HasDepobjDeps) {
4338 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4339 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4340 continue;
4341 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4342 DependenciesArray);
4343 }
4344 }
4345 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4347 return std::make_pair(NumOfElements, DependenciesArray);
4348}
4349
4351 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4353 if (Dependencies.DepExprs.empty())
4354 return Address::invalid();
4355 // Process list of dependencies.
4357 Address DependenciesArray = Address::invalid();
4358 unsigned NumDependencies = Dependencies.DepExprs.size();
4359 QualType FlagsTy;
4360 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4361 RecordDecl *KmpDependInfoRD =
4362 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4363
4364 llvm::Value *Size;
4365 // Define type kmp_depend_info[<Dependencies.size()>];
4366 // For depobj reserve one extra element to store the number of elements.
4367 // It is required to handle depobj(x) update(in) construct.
4368 // kmp_depend_info[<Dependencies.size()>] deps;
4369 llvm::Value *NumDepsVal;
4370 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4371 if (const auto *IE =
4372 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4373 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4374 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4375 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4376 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4377 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4378 }
4379 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4380 NumDepsVal);
4381 CharUnits SizeInBytes =
4382 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4383 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4384 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4385 NumDepsVal =
4386 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4387 } else {
4388 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4389 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4390 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4391 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4392 Size = CGM.getSize(Sz.alignTo(Align));
4393 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4394 }
4395 // Need to allocate on the dynamic memory.
4396 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4397 // Use default allocator.
4398 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4399 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4400
4401 llvm::Value *Addr =
4402 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4403 CGM.getModule(), OMPRTL___kmpc_alloc),
4404 Args, ".dep.arr.addr");
4405 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4407 Addr, KmpDependInfoLlvmTy->getPointerTo());
4408 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4409 // Write number of elements in the first element of array for depobj.
4410 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4411 // deps[i].base_addr = NumDependencies;
4412 LValue BaseAddrLVal = CGF.EmitLValueForField(
4413 Base,
4414 *std::next(KmpDependInfoRD->field_begin(),
4415 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4416 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4417 llvm::PointerUnion<unsigned *, LValue *> Pos;
4418 unsigned Idx = 1;
4419 LValue PosLVal;
4420 if (Dependencies.IteratorExpr) {
4421 PosLVal = CGF.MakeAddrLValue(
4422 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4423 C.getSizeType());
4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4425 /*IsInit=*/true);
4426 Pos = &PosLVal;
4427 } else {
4428 Pos = &Idx;
4429 }
4430 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4431 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4432 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4433 CGF.Int8Ty);
4434 return DependenciesArray;
4435}
4436
4440 QualType FlagsTy;
4441 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4442 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4443 C.VoidPtrTy.castAs<PointerType>());
4444 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4446 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4448 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4449 Addr.getElementType(), Addr.emitRawPointer(CGF),
4450 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4451 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4452 CGF.VoidPtrTy);
4453 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4454 // Use default allocator.
4455 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4456 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4457
4458 // _kmpc_free(gtid, addr, nullptr);
4459 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4460 CGM.getModule(), OMPRTL___kmpc_free),
4461 Args);
4462}
4463
4465 OpenMPDependClauseKind NewDepKind,
4468 QualType FlagsTy;
4469 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4470 RecordDecl *KmpDependInfoRD =
4471 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4472 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4473 llvm::Value *NumDeps;
4474 LValue Base;
4475 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4476
4477 Address Begin = Base.getAddress();
4478 // Cast from pointer to array type to pointer to single element.
4479 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4480 Begin.emitRawPointer(CGF), NumDeps);
4481 // The basic structure here is a while-do loop.
4482 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4483 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4484 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4485 CGF.EmitBlock(BodyBB);
4486 llvm::PHINode *ElementPHI =
4487 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4488 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4489 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4490 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4491 Base.getTBAAInfo());
4492 // deps[i].flags = NewDepKind;
4493 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4494 LValue FlagsLVal = CGF.EmitLValueForField(
4495 Base, *std::next(KmpDependInfoRD->field_begin(),
4496 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4498 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4499 FlagsLVal);
4500
4501 // Shift the address forward by one element.
4502 llvm::Value *ElementNext =
4503 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4504 .emitRawPointer(CGF);
4505 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4506 llvm::Value *IsEmpty =
4507 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4508 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4509 // Done.
4510 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4511}
4512
4515 llvm::Function *TaskFunction,
4516 QualType SharedsTy, Address Shareds,
4517 const Expr *IfCond,
4518 const OMPTaskDataTy &Data) {
4519 if (!CGF.HaveInsertPoint())
4520 return;
4521
4523 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4524 llvm::Value *NewTask = Result.NewTask;
4525 llvm::Function *TaskEntry = Result.TaskEntry;
4526 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4527 LValue TDBase = Result.TDBase;
4528 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4529 // Process list of dependences.
4530 Address DependenciesArray = Address::invalid();
4531 llvm::Value *NumOfElements;
4532 std::tie(NumOfElements, DependenciesArray) =
4533 emitDependClause(CGF, Data.Dependences, Loc);
4534
4535 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4536 // libcall.
4537 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4538 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4539 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4540 // list is not empty
4541 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4542 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4543 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4544 llvm::Value *DepTaskArgs[7];
4545 if (!Data.Dependences.empty()) {
4546 DepTaskArgs[0] = UpLoc;
4547 DepTaskArgs[1] = ThreadID;
4548 DepTaskArgs[2] = NewTask;
4549 DepTaskArgs[3] = NumOfElements;
4550 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4551 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4552 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4553 }
4554 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4555 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4556 if (!Data.Tied) {
4557 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4558 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4559 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4560 }
4561 if (!Data.Dependences.empty()) {
4562 CGF.EmitRuntimeCall(
4563 OMPBuilder.getOrCreateRuntimeFunction(
4564 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4565 DepTaskArgs);
4566 } else {
4567 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4568 CGM.getModule(), OMPRTL___kmpc_omp_task),
4569 TaskArgs);
4570 }
4571 // Check if parent region is untied and build return for untied task;
4572 if (auto *Region =
4573 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4574 Region->emitUntiedSwitch(CGF);
4575 };
4576
4577 llvm::Value *DepWaitTaskArgs[7];
4578 if (!Data.Dependences.empty()) {
4579 DepWaitTaskArgs[0] = UpLoc;
4580 DepWaitTaskArgs[1] = ThreadID;
4581 DepWaitTaskArgs[2] = NumOfElements;
4582 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4583 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4584 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4585 DepWaitTaskArgs[6] =
4586 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4587 }
4588 auto &M = CGM.getModule();
4589 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4590 TaskEntry, &Data, &DepWaitTaskArgs,
4592 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4593 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4594 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4595 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4596 // is specified.
4597 if (!Data.Dependences.empty())
4598 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4599 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4600 DepWaitTaskArgs);
4601 // Call proxy_task_entry(gtid, new_task);
4602 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4603 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4604 Action.Enter(CGF);
4605 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4606 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4607 OutlinedFnArgs);
4608 };
4609
4610 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4611 // kmp_task_t *new_task);
4612 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4613 // kmp_task_t *new_task);
4614 RegionCodeGenTy RCG(CodeGen);
4615 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4616 M, OMPRTL___kmpc_omp_task_begin_if0),
4617 TaskArgs,
4618 OMPBuilder.getOrCreateRuntimeFunction(
4619 M, OMPRTL___kmpc_omp_task_complete_if0),
4620 TaskArgs);
4621 RCG.setAction(Action);
4622 RCG(CGF);
4623 };
4624
4625 if (IfCond) {
4626 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4627 } else {
4628 RegionCodeGenTy ThenRCG(ThenCodeGen);
4629 ThenRCG(CGF);
4630 }
4631}
4632
4634 const OMPLoopDirective &D,
4635 llvm::Function *TaskFunction,
4636 QualType SharedsTy, Address Shareds,
4637 const Expr *IfCond,
4638 const OMPTaskDataTy &Data) {
4639 if (!CGF.HaveInsertPoint())
4640 return;
4642 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4643 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4644 // libcall.
4645 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4646 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4647 // sched, kmp_uint64 grainsize, void *task_dup);
4648 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4649 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4650 llvm::Value *IfVal;
4651 if (IfCond) {
4652 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4653 /*isSigned=*/true);
4654 } else {
4655 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4656 }
4657
4658 LValue LBLVal = CGF.EmitLValueForField(
4659 Result.TDBase,
4660 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4661 const auto *LBVar =
4662 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4663 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4664 /*IsInitializer=*/true);
4665 LValue UBLVal = CGF.EmitLValueForField(
4666 Result.TDBase,
4667 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4668 const auto *UBVar =
4669 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4670 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4671 /*IsInitializer=*/true);
4672 LValue StLVal = CGF.EmitLValueForField(
4673 Result.TDBase,
4674 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4675 const auto *StVar =
4676 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4677 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4678 /*IsInitializer=*/true);
4679 // Store reductions address.
4680 LValue RedLVal = CGF.EmitLValueForField(
4681 Result.TDBase,
4682 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4683 if (Data.Reductions) {
4684 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4685 } else {
4686 CGF.EmitNullInitialization(RedLVal.getAddress(),
4687 CGF.getContext().VoidPtrTy);
4688 }
4689 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4690 llvm::Value *TaskArgs[] = {
4691 UpLoc,
4692 ThreadID,
4693 Result.NewTask,
4694 IfVal,
4695 LBLVal.getPointer(CGF),
4696 UBLVal.getPointer(CGF),
4697 CGF.EmitLoadOfScalar(StLVal, Loc),
4698 llvm::ConstantInt::getSigned(
4699 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4700 llvm::ConstantInt::getSigned(
4701 CGF.IntTy, Data.Schedule.getPointer()
4702 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4703 : NoSchedule),
4704 Data.Schedule.getPointer()
4705 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4706 /*isSigned=*/false)
4707 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4709 Result.TaskDupFn, CGF.VoidPtrTy)
4710 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4711 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4712 CGM.getModule(), OMPRTL___kmpc_taskloop),
4713 TaskArgs);
4714}
4715
4716/// Emit reduction operation for each element of array (required for
4717/// array sections) LHS op = RHS.
4718/// \param Type Type of array.
4719/// \param LHSVar Variable on the left side of the reduction operation
4720/// (references element of array in original variable).
4721/// \param RHSVar Variable on the right side of the reduction operation
4722/// (references element of array in original variable).
4723/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4724/// RHSVar.
4726 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4727 const VarDecl *RHSVar,
4728 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4729 const Expr *, const Expr *)> &RedOpGen,
4730 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4731 const Expr *UpExpr = nullptr) {
4732 // Perform element-by-element initialization.
4733 QualType ElementTy;
4734 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4735 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4736
4737 // Drill down to the base element type on both arrays.
4738 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4739 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4740
4741 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4742 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4743 // Cast from pointer to array type to pointer to single element.
4744 llvm::Value *LHSEnd =
4745 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4746 // The basic structure here is a while-do loop.
4747 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4748 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4749 llvm::Value *IsEmpty =
4750 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4751 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4752
4753 // Enter the loop body, making that address the current address.
4754 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4755 CGF.EmitBlock(BodyBB);
4756
4757 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4758
4759 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4760 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4761 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4762 Address RHSElementCurrent(
4763 RHSElementPHI, RHSAddr.getElementType(),
4764 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4765
4766 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4767 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4768 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4769 Address LHSElementCurrent(
4770 LHSElementPHI, LHSAddr.getElementType(),
4771 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4772
4773 // Emit copy.
4774 CodeGenFunction::OMPPrivateScope Scope(CGF);
4775 Scope.addPrivate(LHSVar, LHSElementCurrent);
4776 Scope.addPrivate(RHSVar, RHSElementCurrent);
4777 Scope.Privatize();
4778 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4779 Scope.ForceCleanup();
4780
4781 // Shift the address forward by one element.
4782 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4783 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4784 "omp.arraycpy.dest.element");
4785 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4786 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4787 "omp.arraycpy.src.element");
4788 // Check whether we've reached the end.
4789 llvm::Value *Done =
4790 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4791 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4792 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4793 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4794
4795 // Done.
4796 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4797}
4798
4799/// Emit reduction combiner. If the combiner is a simple expression emit it as
4800/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4801/// UDR combiner function.
4803 const Expr *ReductionOp) {
4804 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4805 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4806 if (const auto *DRE =
4807 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4808 if (const auto *DRD =
4809 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4810 std::pair<llvm::Function *, llvm::Function *> Reduction =
4813 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4814 CGF.EmitIgnoredExpr(ReductionOp);
4815 return;
4816 }
4817 CGF.EmitIgnoredExpr(ReductionOp);
4818}
4819
4821 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4823 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4825
4826 // void reduction_func(void *LHSArg, void *RHSArg);
4827 FunctionArgList Args;
4828 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4830 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4832 Args.push_back(&LHSArg);
4833 Args.push_back(&RHSArg);
4834 const auto &CGFI =
4836 std::string Name = getReductionFuncName(ReducerName);
4837 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4838 llvm::GlobalValue::InternalLinkage, Name,
4839 &CGM.getModule());
4841 Fn->setDoesNotRecurse();
4842 CodeGenFunction CGF(CGM);
4843 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4844
4845 // Dst = (void*[n])(LHSArg);
4846 // Src = (void*[n])(RHSArg);
4848 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4849 ArgsElemType->getPointerTo()),
4850 ArgsElemType, CGF.getPointerAlign());
4852 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4853 ArgsElemType->getPointerTo()),
4854 ArgsElemType, CGF.getPointerAlign());
4855
4856 // ...
4857 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4858 // ...
4860 const auto *IPriv = Privates.begin();
4861 unsigned Idx = 0;
4862 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4863 const auto *RHSVar =
4864 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4865 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4866 const auto *LHSVar =
4867 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4868 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4869 QualType PrivTy = (*IPriv)->getType();
4870 if (PrivTy->isVariablyModifiedType()) {
4871 // Get array size and emit VLA type.
4872 ++Idx;
4873 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4874 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4875 const VariableArrayType *VLA =
4876 CGF.getContext().getAsVariableArrayType(PrivTy);
4877 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4879 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4880 CGF.EmitVariablyModifiedType(PrivTy);
4881 }
4882 }
4883 Scope.Privatize();
4884 IPriv = Privates.begin();
4885 const auto *ILHS = LHSExprs.begin();
4886 const auto *IRHS = RHSExprs.begin();
4887 for (const Expr *E : ReductionOps) {
4888 if ((*IPriv)->getType()->isArrayType()) {
4889 // Emit reduction for array section.
4890 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4891 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4893 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4894 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4895 emitReductionCombiner(CGF, E);
4896 });
4897 } else {
4898 // Emit reduction for array subscript or single variable.
4900 }
4901 ++IPriv;
4902 ++ILHS;
4903 ++IRHS;
4904 }
4905 Scope.ForceCleanup();
4906 CGF.FinishFunction();
4907 return Fn;
4908}
4909
4911 const Expr *ReductionOp,
4912 const Expr *PrivateRef,
4913 const DeclRefExpr *LHS,
4914 const DeclRefExpr *RHS) {
4915 if (PrivateRef->getType()->isArrayType()) {
4916 // Emit reduction for array section.
4917 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4918 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4920 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4921 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4922 emitReductionCombiner(CGF, ReductionOp);
4923 });
4924 } else {
4925 // Emit reduction for array subscript or single variable.
4926 emitReductionCombiner(CGF, ReductionOp);
4927 }
4928}
4929
4931 ArrayRef<const Expr *> Privates,
4932 ArrayRef<const Expr *> LHSExprs,
4933 ArrayRef<const Expr *> RHSExprs,
4934 ArrayRef<const Expr *> ReductionOps,
4935 ReductionOptionsTy Options) {
4936 if (!CGF.HaveInsertPoint())
4937 return;
4938
4939 bool WithNowait = Options.WithNowait;
4940 bool SimpleReduction = Options.SimpleReduction;
4941
4942 // Next code should be emitted for reduction:
4943 //
4944 // static kmp_critical_name lock = { 0 };
4945 //
4946 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4947 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4948 // ...
4949 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4950 // *(Type<n>-1*)rhs[<n>-1]);
4951 // }
4952 //
4953 // ...
4954 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4955 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4956 // RedList, reduce_func, &<lock>)) {
4957 // case 1:
4958 // ...
4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4960 // ...
4961 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4962 // break;
4963 // case 2:
4964 // ...
4965 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4966 // ...
4967 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4968 // break;
4969 // default:;
4970 // }
4971 //
4972 // if SimpleReduction is true, only the next code is generated:
4973 // ...
4974 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4975 // ...
4976
4978
4979 if (SimpleReduction) {
4981 const auto *IPriv = Privates.begin();
4982 const auto *ILHS = LHSExprs.begin();
4983 const auto *IRHS = RHSExprs.begin();
4984 for (const Expr *E : ReductionOps) {
4985 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4986 cast<DeclRefExpr>(*IRHS));
4987 ++IPriv;
4988 ++ILHS;
4989 ++IRHS;
4990 }
4991 return;
4992 }
4993
4994 // 1. Build a list of reduction variables.
4995 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4996 auto Size = RHSExprs.size();
4997 for (const Expr *E : Privates) {
4999 // Reserve place for array size.
5000 ++Size;
5001 }
5002 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5003 QualType ReductionArrayTy = C.getConstantArrayType(
5004 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5005 /*IndexTypeQuals=*/0);
5006 RawAddress ReductionList =
5007 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5008 const auto *IPriv = Privates.begin();
5009 unsigned Idx = 0;
5010 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5011 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5012 CGF.Builder.CreateStore(
5014 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5015 Elem);
5016 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5017 // Store array size.
5018 ++Idx;
5019 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5020 llvm::Value *Size = CGF.Builder.CreateIntCast(
5021 CGF.getVLASize(
5022 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5023 .NumElts,
5024 CGF.SizeTy, /*isSigned=*/false);
5025 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5026 Elem);
5027 }
5028 }
5029
5030 // 2. Emit reduce_func().
5031 llvm::Function *ReductionFn = emitReductionFunction(
5032 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5033 Privates, LHSExprs, RHSExprs, ReductionOps);
5034
5035 // 3. Create static kmp_critical_name lock = { 0 };
5036 std::string Name = getName({"reduction"});
5037 llvm::Value *Lock = getCriticalRegionLock(Name);
5038
5039 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5040 // RedList, reduce_func, &<lock>);
5041 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5042 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5043 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5044 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045 ReductionList.getPointer(), CGF.VoidPtrTy);
5046 llvm::Value *Args[] = {
5047 IdentTLoc, // ident_t *<loc>
5048 ThreadId, // i32 <gtid>
5049 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5050 ReductionArrayTySize, // size_type sizeof(RedList)
5051 RL, // void *RedList
5052 ReductionFn, // void (*) (void *, void *) <reduce_func>
5053 Lock // kmp_critical_name *&<lock>
5054 };
5055 llvm::Value *Res = CGF.EmitRuntimeCall(
5056 OMPBuilder.getOrCreateRuntimeFunction(
5057 CGM.getModule(),
5058 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5059 Args);
5060
5061 // 5. Build switch(res)
5062 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5063 llvm::SwitchInst *SwInst =
5064 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5065
5066 // 6. Build case 1:
5067 // ...
5068 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5069 // ...
5070 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5071 // break;
5072 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5073 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5074 CGF.EmitBlock(Case1BB);
5075
5076 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5077 llvm::Value *EndArgs[] = {
5078 IdentTLoc, // ident_t *<loc>
5079 ThreadId, // i32 <gtid>
5080 Lock // kmp_critical_name *&<lock>
5081 };
5082 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5083 CodeGenFunction &CGF, PrePostActionTy &Action) {
5085 const auto *IPriv = Privates.begin();
5086 const auto *ILHS = LHSExprs.begin();
5087 const auto *IRHS = RHSExprs.begin();
5088 for (const Expr *E : ReductionOps) {
5089 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5090 cast<DeclRefExpr>(*IRHS));
5091 ++IPriv;
5092 ++ILHS;
5093 ++IRHS;
5094 }
5095 };
5096 RegionCodeGenTy RCG(CodeGen);
5097 CommonActionTy Action(
5098 nullptr, std::nullopt,
5099 OMPBuilder.getOrCreateRuntimeFunction(
5100 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5101 : OMPRTL___kmpc_end_reduce),
5102 EndArgs);
5103 RCG.setAction(Action);
5104 RCG(CGF);
5105
5106 CGF.EmitBranch(DefaultBB);
5107
5108 // 7. Build case 2:
5109 // ...
5110 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5111 // ...
5112 // break;
5113 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5114 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5115 CGF.EmitBlock(Case2BB);
5116
5117 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5118 CodeGenFunction &CGF, PrePostActionTy &Action) {
5119 const auto *ILHS = LHSExprs.begin();
5120 const auto *IRHS = RHSExprs.begin();
5121 const auto *IPriv = Privates.begin();
5122 for (const Expr *E : ReductionOps) {
5123 const Expr *XExpr = nullptr;
5124 const Expr *EExpr = nullptr;
5125 const Expr *UpExpr = nullptr;
5126 BinaryOperatorKind BO = BO_Comma;
5127 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5128 if (BO->getOpcode() == BO_Assign) {
5129 XExpr = BO->getLHS();
5130 UpExpr = BO->getRHS();
5131 }
5132 }
5133 // Try to emit update expression as a simple atomic.
5134 const Expr *RHSExpr = UpExpr;
5135 if (RHSExpr) {
5136 // Analyze RHS part of the whole expression.
5137 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5138 RHSExpr->IgnoreParenImpCasts())) {
5139 // If this is a conditional operator, analyze its condition for
5140 // min/max reduction operator.
5141 RHSExpr = ACO->getCond();
5142 }
5143 if (const auto *BORHS =
5144 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5145 EExpr = BORHS->getRHS();
5146 BO = BORHS->getOpcode();
5147 }
5148 }
5149 if (XExpr) {
5150 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5151 auto &&AtomicRedGen = [BO, VD,
5152 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5153 const Expr *EExpr, const Expr *UpExpr) {
5154 LValue X = CGF.EmitLValue(XExpr);
5155 RValue E;
5156 if (EExpr)
5157 E = CGF.EmitAnyExpr(EExpr);
5158 CGF.EmitOMPAtomicSimpleUpdateExpr(
5159 X, E, BO, /*IsXLHSInRHSPart=*/true,
5160 llvm::AtomicOrdering::Monotonic, Loc,
5161 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5162 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5163 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5164 CGF.emitOMPSimpleStore(
5165 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5166 VD->getType().getNonReferenceType(), Loc);
5167 PrivateScope.addPrivate(VD, LHSTemp);
5168 (void)PrivateScope.Privatize();
5169 return CGF.EmitAnyExpr(UpExpr);
5170 });
5171 };
5172 if ((*IPriv)->getType()->isArrayType()) {
5173 // Emit atomic reduction for array section.
5174 const auto *RHSVar =
5175 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5176 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5177 AtomicRedGen, XExpr, EExpr, UpExpr);
5178 } else {
5179 // Emit atomic reduction for array subscript or single variable.
5180 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5181 }
5182 } else {
5183 // Emit as a critical region.
5184 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5185 const Expr *, const Expr *) {
5187 std::string Name = RT.getName({"atomic_reduction"});
5189 CGF, Name,
5190 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5191 Action.Enter(CGF);
5193 },
5194 Loc);
5195 };
5196 if ((*IPriv)->getType()->isArrayType()) {
5197 const auto *LHSVar =
5198 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5199 const auto *RHSVar =
5200 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5201 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5202 CritRedGen);
5203 } else {
5204 CritRedGen(CGF, nullptr, nullptr, nullptr);
5205 }
5206 }
5207 ++ILHS;
5208 ++IRHS;
5209 ++IPriv;
5210 }
5211 };
5212 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5213 if (!WithNowait) {
5214 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5215 llvm::Value *EndArgs[] = {
5216 IdentTLoc, // ident_t *<loc>
5217 ThreadId, // i32 <gtid>
5218 Lock // kmp_critical_name *&<lock>
5219 };
5220 CommonActionTy Action(nullptr, std::nullopt,
5221 OMPBuilder.getOrCreateRuntimeFunction(
5222 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5223 EndArgs);
5224 AtomicRCG.setAction(Action);
5225 AtomicRCG(CGF);
5226 } else {
5227 AtomicRCG(CGF);
5228 }
5229
5230 CGF.EmitBranch(DefaultBB);
5231 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5232}
5233
5234/// Generates unique name for artificial threadprivate variables.
5235/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5236static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5237 const Expr *Ref) {
5238 SmallString<256> Buffer;
5239 llvm::raw_svector_ostream Out(Buffer);
5240 const clang::DeclRefExpr *DE;
5241 const VarDecl *D = ::getBaseDecl(Ref, DE);
5242 if (!D)
5243 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5244 D = D->getCanonicalDecl();
5245 std::string Name = CGM.getOpenMPRuntime().getName(
5246 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5247 Out << Prefix << Name << "_"
5249 return std::string(Out.str());
5250}
5251
5252/// Emits reduction initializer function:
5253/// \code
5254/// void @.red_init(void* %arg, void* %orig) {
5255/// %0 = bitcast void* %arg to <type>*
5256/// store <type> <init>, <type>* %0
5257/// ret void
5258/// }
5259/// \endcode
5260static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5262 ReductionCodeGen &RCG, unsigned N) {
5263 ASTContext &C = CGM.getContext();
5264 QualType VoidPtrTy = C.VoidPtrTy;
5265 VoidPtrTy.addRestrict();
5266 FunctionArgList Args;
5267 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5269 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5271 Args.emplace_back(&Param);
5272 Args.emplace_back(&ParamOrig);
5273 const auto &FnInfo =
5274 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5275 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5276 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5277 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5278 Name, &CGM.getModule());
5279 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5280 Fn->setDoesNotRecurse();
5281 CodeGenFunction CGF(CGM);
5282 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5283 QualType PrivateType = RCG.getPrivateType(N);
5284 Address PrivateAddr = CGF.EmitLoadOfPointer(
5286 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5287 C.getPointerType(PrivateType)->castAs<PointerType>());
5288 llvm::Value *Size = nullptr;
5289 // If the size of the reduction item is non-constant, load it from global
5290 // threadprivate variable.
5291 if (RCG.getSizes(N).second) {
5293 CGF, CGM.getContext().getSizeType(),
5294 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5295 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5296 CGM.getContext().getSizeType(), Loc);
5297 }
5298 RCG.emitAggregateType(CGF, N, Size);
5299 Address OrigAddr = Address::invalid();
5300 // If initializer uses initializer from declare reduction construct, emit a
5301 // pointer to the address of the original reduction item (reuired by reduction
5302 // initializer)
5303 if (RCG.usesReductionInitializer(N)) {
5304 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5305 OrigAddr = CGF.EmitLoadOfPointer(
5306 SharedAddr,
5307 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5308 }
5309 // Emit the initializer:
5310 // %0 = bitcast void* %arg to <type>*
5311 // store <type> <init>, <type>* %0
5312 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5313 [](CodeGenFunction &) { return false; });
5314 CGF.FinishFunction();
5315 return Fn;
5316}
5317
5318/// Emits reduction combiner function:
5319/// \code
5320/// void @.red_comb(void* %arg0, void* %arg1) {
5321/// %lhs = bitcast void* %arg0 to <type>*
5322/// %rhs = bitcast void* %arg1 to <type>*
5323/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5324/// store <type> %2, <type>* %lhs
5325/// ret void
5326/// }
5327/// \endcode
5328static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5330 ReductionCodeGen &RCG, unsigned N,
5331 const Expr *ReductionOp,
5332 const Expr *LHS, const Expr *RHS,
5333 const Expr *PrivateRef) {
5334 ASTContext &C = CGM.getContext();
5335 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5336 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5337 FunctionArgList Args;
5338 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5339 C.VoidPtrTy, ImplicitParamKind::Other);
5340 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5342 Args.emplace_back(&ParamInOut);
5343 Args.emplace_back(&ParamIn);
5344 const auto &FnInfo =
5345 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5346 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5347 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5348 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5349 Name, &CGM.getModule());
5350 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5351 Fn->setDoesNotRecurse();
5352 CodeGenFunction CGF(CGM);
5353 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5354 llvm::Value *Size = nullptr;
5355 // If the size of the reduction item is non-constant, load it from global
5356 // threadprivate variable.
5357 if (RCG.getSizes(N).second) {
5359 CGF, CGM.getContext().getSizeType(),
5360 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5361 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5362 CGM.getContext().getSizeType(), Loc);
5363 }
5364 RCG.emitAggregateType(CGF, N, Size);
5365 // Remap lhs and rhs variables to the addresses of the function arguments.
5366 // %lhs = bitcast void* %arg0 to <type>*
5367 // %rhs = bitcast void* %arg1 to <type>*
5368 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5369 PrivateScope.addPrivate(
5370 LHSVD,
5371 // Pull out the pointer to the variable.
5373 CGF.GetAddrOfLocalVar(&ParamInOut)
5375 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5376 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5377 PrivateScope.addPrivate(
5378 RHSVD,
5379 // Pull out the pointer to the variable.
5381 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5382 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5383 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5384 PrivateScope.Privatize();
5385 // Emit the combiner body:
5386 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5387 // store <type> %2, <type>* %lhs
5389 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5390 cast<DeclRefExpr>(RHS));
5391 CGF.FinishFunction();
5392 return Fn;
5393}
5394
5395/// Emits reduction finalizer function:
5396/// \code
5397/// void @.red_fini(void* %arg) {
5398/// %0 = bitcast void* %arg to <type>*
5399/// <destroy>(<type>* %0)
5400/// ret void
5401/// }
5402/// \endcode
5403static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5405 ReductionCodeGen &RCG, unsigned N) {
5406 if (!RCG.needCleanups(N))
5407 return nullptr;
5408 ASTContext &C = CGM.getContext();
5409 FunctionArgList Args;
5410 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5412 Args.emplace_back(&Param);
5413 const auto &FnInfo =
5414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5415 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5416 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5417 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5418 Name, &CGM.getModule());
5419 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5420 Fn->setDoesNotRecurse();
5421 CodeGenFunction CGF(CGM);
5422 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5423 Address PrivateAddr = CGF.EmitLoadOfPointer(
5424 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5425 llvm::Value *Size = nullptr;
5426 // If the size of the reduction item is non-constant, load it from global
5427 // threadprivate variable.
5428 if (RCG.getSizes(N).second) {
5430 CGF, CGM.getContext().getSizeType(),
5431 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5432 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5433 CGM.getContext().getSizeType(), Loc);
5434 }
5435 RCG.emitAggregateType(CGF, N, Size);
5436 // Emit the finalizer body:
5437 // <destroy>(<type>* %0)
5438 RCG.emitCleanups(CGF, N, PrivateAddr);
5439 CGF.FinishFunction(Loc);
5440 return Fn;
5441}
5442
5445 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5446 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5447 return nullptr;
5448
5449 // Build typedef struct:
5450 // kmp_taskred_input {
5451 // void *reduce_shar; // shared reduction item
5452 // void *reduce_orig; // original reduction item used for initialization
5453 // size_t reduce_size; // size of data item
5454 // void *reduce_init; // data initialization routine
5455 // void *reduce_fini; // data finalization routine
5456 // void *reduce_comb; // data combiner routine
5457 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5458 // } kmp_taskred_input_t;
5460 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5461 RD->startDefinition();
5462 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5463 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5464 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5465 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5466 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5467 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5468 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5469 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5470 RD->completeDefinition();
5471 QualType RDType = C.getRecordType(RD);
5472 unsigned Size = Data.ReductionVars.size();
5473 llvm::APInt ArraySize(/*numBits=*/64, Size);
5474 QualType ArrayRDType =
5475 C.getConstantArrayType(RDType, ArraySize, nullptr,
5476 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5477 // kmp_task_red_input_t .rd_input.[Size];
5478 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5479 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5480 Data.ReductionCopies, Data.ReductionOps);
5481 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5482 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5483 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5484 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5485 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5486 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5487 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5488 ".rd_input.gep.");
5489 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5490 // ElemLVal.reduce_shar = &Shareds[Cnt];
5491 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5492 RCG.emitSharedOrigLValue(CGF, Cnt);
5493 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5494 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5495 // ElemLVal.reduce_orig = &Origs[Cnt];
5496 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5497 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5498 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5499 RCG.emitAggregateType(CGF, Cnt);
5500 llvm::Value *SizeValInChars;
5501 llvm::Value *SizeVal;
5502 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5503 // We use delayed creation/initialization for VLAs and array sections. It is
5504 // required because runtime does not provide the way to pass the sizes of
5505 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5506 // threadprivate global variables are used to store these values and use
5507 // them in the functions.
5508 bool DelayedCreation = !!SizeVal;
5509 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5510 /*isSigned=*/false);
5511 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5512 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5513 // ElemLVal.reduce_init = init;
5514 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5515 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5516 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5517 // ElemLVal.reduce_fini = fini;
5518 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5519 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5520 llvm::Value *FiniAddr =
5521 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5522 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5523 // ElemLVal.reduce_comb = comb;
5524 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5525 llvm::Value *CombAddr = emitReduceCombFunction(
5526 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5527 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5528 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5529 // ElemLVal.flags = 0;
5530 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5531 if (DelayedCreation) {
5533 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5534 FlagsLVal);
5535 } else
5536 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5537 }
5538 if (Data.IsReductionWithTaskMod) {
5539 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5540 // is_ws, int num, void *data);
5541 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5542 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5543 CGM.IntTy, /*isSigned=*/true);
5544 llvm::Value *Args[] = {
5545 IdentTLoc, GTid,
5546 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5547 /*isSigned=*/true),
5548 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5550 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5551 return CGF.EmitRuntimeCall(
5552 OMPBuilder.getOrCreateRuntimeFunction(
5553 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5554 Args);
5555 }
5556 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5557 llvm::Value *Args[] = {
5558 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5559 /*isSigned=*/true),
5560 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5562 CGM.VoidPtrTy)};
5563 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5564 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5565 Args);
5566}
5567
5570 bool IsWorksharingReduction) {
5571 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5572 // is_ws, int num, void *data);
5573 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5574 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5575 CGM.IntTy, /*isSigned=*/true);
5576 llvm::Value *Args[] = {IdentTLoc, GTid,
5577 llvm::ConstantInt::get(CGM.IntTy,
5578 IsWorksharingReduction ? 1 : 0,
5579 /*isSigned=*/true)};
5580 (void)CGF.EmitRuntimeCall(
5581 OMPBuilder.getOrCreateRuntimeFunction(
5582 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5583 Args);
5584}
5585
5588 ReductionCodeGen &RCG,
5589 unsigned N) {
5590 auto Sizes = RCG.getSizes(N);
5591 // Emit threadprivate global variable if the type is non-constant
5592 // (Sizes.second = nullptr).
5593 if (Sizes.second) {
5594 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5595 /*isSigned=*/false);
5597 CGF, CGM.getContext().getSizeType(),
5598 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5599 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5600 }
5601}
5602
5605 llvm::Value *ReductionsPtr,
5606 LValue SharedLVal) {
5607 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5608 // *d);
5609 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5610 CGM.IntTy,
5611 /*isSigned=*/true),
5612 ReductionsPtr,
5614 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5615 return Address(
5616 CGF.EmitRuntimeCall(
5617 OMPBuilder.getOrCreateRuntimeFunction(
5618 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5619 Args),
5620 CGF.Int8Ty, SharedLVal.getAlignment());
5621}
5622
5624 const OMPTaskDataTy &Data) {
5625 if (!CGF.HaveInsertPoint())
5626 return;
5627
5628 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5629 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5630 OMPBuilder.createTaskwait(CGF.Builder);
5631 } else {
5632 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5633 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5634 auto &M = CGM.getModule();
5635 Address DependenciesArray = Address::invalid();
5636 llvm::Value *NumOfElements;
5637 std::tie(NumOfElements, DependenciesArray) =
5638 emitDependClause(CGF, Data.Dependences, Loc);
5639 if (!Data.Dependences.empty()) {
5640 llvm::Value *DepWaitTaskArgs[7];
5641 DepWaitTaskArgs[0] = UpLoc;
5642 DepWaitTaskArgs[1] = ThreadID;
5643 DepWaitTaskArgs[2] = NumOfElements;
5644 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5645 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5646 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5647 DepWaitTaskArgs[6] =
5648 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5649
5650 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5651
5652 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5653 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5654 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5655 // kmp_int32 has_no_wait); if dependence info is specified.
5656 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5657 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5658 DepWaitTaskArgs);
5659
5660 } else {
5661
5662 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5663 // global_tid);
5664 llvm::Value *Args[] = {UpLoc, ThreadID};
5665 // Ignore return result until untied tasks are supported.
5666 CGF.EmitRuntimeCall(
5667 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5668 Args);
5669 }
5670 }
5671
5672 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5673 Region->emitUntiedSwitch(CGF);
5674}
5675
5677 OpenMPDirectiveKind InnerKind,
5678 const RegionCodeGenTy &CodeGen,
5679 bool HasCancel) {
5680 if (!CGF.HaveInsertPoint())
5681 return;
5682 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5683 InnerKind != OMPD_critical &&
5684 InnerKind != OMPD_master &&
5685 InnerKind != OMPD_masked);
5686 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5687}
5688
5689namespace {
5690enum RTCancelKind {
5691 CancelNoreq = 0,
5692 CancelParallel = 1,
5693 CancelLoop = 2,
5694 CancelSections = 3,
5695 CancelTaskgroup = 4
5696};
5697} // anonymous namespace
5698
5699static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5700 RTCancelKind CancelKind = CancelNoreq;
5701 if (CancelRegion == OMPD_parallel)
5702 CancelKind = CancelParallel;
5703 else if (CancelRegion == OMPD_for)
5704 CancelKind = CancelLoop;
5705 else if (CancelRegion == OMPD_sections)
5706 CancelKind = CancelSections;
5707 else {
5708 assert(CancelRegion == OMPD_taskgroup);
5709 CancelKind = CancelTaskgroup;
5710 }
5711 return CancelKind;
5712}
5713
5716 OpenMPDirectiveKind CancelRegion) {
5717 if (!CGF.HaveInsertPoint())
5718 return;
5719 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5720 // global_tid, kmp_int32 cncl_kind);
5721 if (auto *OMPRegionInfo =
5722 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5723 // For 'cancellation point taskgroup', the task region info may not have a
5724 // cancel. This may instead happen in another adjacent task.
5725 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5726 llvm::Value *Args[] = {
5728 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5729 // Ignore return result until untied tasks are supported.
5730 llvm::Value *Result = CGF.EmitRuntimeCall(
5731 OMPBuilder.getOrCreateRuntimeFunction(
5732 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5733 Args);
5734 // if (__kmpc_cancellationpoint()) {
5735 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5736 // exit from construct;
5737 // }
5738 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5739 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5740 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5741 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5742 CGF.EmitBlock(ExitBB);
5743 if (CancelRegion == OMPD_parallel)
5744 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5745 // exit from construct;
5746 CodeGenFunction::JumpDest CancelDest =
5747 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5748 CGF.EmitBranchThroughCleanup(CancelDest);
5749 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5750 }
5751 }
5752}
5753
5755 const Expr *IfCond,
5756 OpenMPDirectiveKind CancelRegion) {
5757 if (!CGF.HaveInsertPoint())
5758 return;
5759 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5760 // kmp_int32 cncl_kind);
5761 auto &M = CGM.getModule();
5762 if (auto *OMPRegionInfo =
5763 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5764 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5765 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5767 llvm::Value *Args[] = {
5768 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5769 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5770 // Ignore return result until untied tasks are supported.
5771 llvm::Value *Result = CGF.EmitRuntimeCall(
5772 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5773 // if (__kmpc_cancel()) {
5774 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5775 // exit from construct;
5776 // }
5777 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5778 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5779 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5780 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5781 CGF.EmitBlock(ExitBB);
5782 if (CancelRegion == OMPD_parallel)
5783 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5784 // exit from construct;
5785 CodeGenFunction::JumpDest CancelDest =
5786 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5787 CGF.EmitBranchThroughCleanup(CancelDest);
5788 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5789 };
5790 if (IfCond) {
5791 emitIfClause(CGF, IfCond, ThenGen,
5792 [](CodeGenFunction &, PrePostActionTy &) {});
5793 } else {
5794 RegionCodeGenTy ThenRCG(ThenGen);
5795 ThenRCG(CGF);
5796 }
5797 }
5798}
5799
5800namespace {
5801/// Cleanup action for uses_allocators support.
5802class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5804
5805public:
5806 OMPUsesAllocatorsActionTy(
5807 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5808 : Allocators(Allocators) {}
5809 void Enter(CodeGenFunction &CGF) override {
5810 if (!CGF.HaveInsertPoint())
5811 return;
5812 for (const auto &AllocatorData : Allocators) {
5814 CGF, AllocatorData.first, AllocatorData.second);
5815 }
5816 }
5817 void Exit(CodeGenFunction &CGF) override {
5818 if (!CGF.HaveInsertPoint())
5819 return;
5820 for (const auto &AllocatorData : Allocators) {
5822 AllocatorData.first);
5823 }
5824 }
5825};
5826} // namespace
5827
5829 const OMPExecutableDirective &D, StringRef ParentName,
5830 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5831 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5832 assert(!ParentName.empty() && "Invalid target entry parent name!");
5835 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5836 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5837 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5838 if (!D.AllocatorTraits)
5839 continue;
5840 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5841 }
5842 }
5843 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5844 CodeGen.setAction(UsesAllocatorAction);
5845 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5846 IsOffloadEntry, CodeGen);
5847}
5848
5850 const Expr *Allocator,
5851 const Expr *AllocatorTraits) {
5852 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5853 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5854 // Use default memspace handle.
5855 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5856 llvm::Value *NumTraits = llvm::ConstantInt::get(
5857 CGF.IntTy, cast<ConstantArrayType>(
5858 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5859 ->getSize()
5860 .getLimitedValue());
5861 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5863 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5864 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5865 AllocatorTraitsLVal.getBaseInfo(),
5866 AllocatorTraitsLVal.getTBAAInfo());
5867 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5868
5869 llvm::Value *AllocatorVal =
5870 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5871 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5872 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5873 // Store to allocator.
5874 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5875 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5876 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5877 AllocatorVal =
5878 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5879 Allocator->getType(), Allocator->getExprLoc());
5880 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5881}
5882
5884 const Expr *Allocator) {
5885 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5886 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5887 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5888 llvm::Value *AllocatorVal =
5889 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5890 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5891 CGF.getContext().VoidPtrTy,
5892 Allocator->getExprLoc());
5893 (void)CGF.EmitRuntimeCall(
5894 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5895 OMPRTL___kmpc_destroy_allocator),
5896 {ThreadId, AllocatorVal});
5897}
5898
5901 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5902 int32_t &MaxTeamsVal) {
5903
5904 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5905 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5906 /*UpperBoundOnly=*/true);
5907
5908 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5909 for (auto *A : C->getAttrs()) {
5910 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5911 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5912 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5913 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5914 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5915 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5917 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5918 &AttrMaxThreadsVal);
5919 else
5920 continue;
5921
5922 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5923 if (AttrMaxThreadsVal > 0)
5924 MaxThreadsVal = MaxThreadsVal > 0
5925 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5926 : AttrMaxThreadsVal;
5927 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5928 if (AttrMaxBlocksVal > 0)
5929 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5930 : AttrMaxBlocksVal;
5931 }
5932 }
5933}
5934
5936 const OMPExecutableDirective &D, StringRef ParentName,
5937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5939
5940 llvm::TargetRegionEntryInfo EntryInfo =
5942
5943 CodeGenFunction CGF(CGM, true);
5944 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5945 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5946 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5947
5948 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5951 };
5952
5953 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5954 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5955
5956 if (!OutlinedFn)
5957 return;
5958
5959 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5960
5961 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5962 for (auto *A : C->getAttrs()) {
5963 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5964 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5965 }
5966 }
5967}
5968
5969/// Checks if the expression is constant or does not have non-trivial function
5970/// calls.
5971static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5972 // We can skip constant expressions.
5973 // We can skip expressions with trivial calls or simple expressions.
5975 !E->hasNonTrivialCall(Ctx)) &&
5976 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5977}
5978
5980 const Stmt *Body) {
5981 const Stmt *Child = Body->IgnoreContainers();
5982 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5983 Child = nullptr;
5984 for (const Stmt *S : C->body()) {
5985 if (const auto *E = dyn_cast<Expr>(S)) {
5986 if (isTrivial(Ctx, E))
5987 continue;
5988 }
5989 // Some of the statements can be ignored.
5990 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5991 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5992 continue;
5993 // Analyze declarations.
5994 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5995 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5996 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5997 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5998 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5999 isa<UsingDirectiveDecl>(D) ||
6000 isa<OMPDeclareReductionDecl>(D) ||
6001 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6002 return true;
6003 const auto *VD = dyn_cast<VarDecl>(D);
6004 if (!VD)
6005 return false;
6006 return VD->hasGlobalStorage() || !VD->isUsed();
6007 }))
6008 continue;
6009 }
6010 // Found multiple children - cannot get the one child only.
6011 if (Child)
6012 return nullptr;
6013 Child = S;
6014 }
6015 if (Child)
6016 Child = Child->IgnoreContainers();
6017 }
6018 return Child;
6019}
6020
6022 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6023 int32_t &MaxTeamsVal) {
6024
6025 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6026 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6027 "Expected target-based executable directive.");
6028 switch (DirectiveKind) {
6029 case OMPD_target: {
6030 const auto *CS = D.getInnermostCapturedStmt();
6031 const auto *Body =
6032 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6033 const Stmt *ChildStmt =
6035 if (const auto *NestedDir =
6036 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6037 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6038 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6039 const Expr *NumTeams =
6040 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6041 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6042 if (auto Constant =
6043 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6044 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6045 return NumTeams;
6046 }
6047 MinTeamsVal = MaxTeamsVal = 0;
6048 return nullptr;
6049 }
6050 MinTeamsVal = MaxTeamsVal = 1;
6051 return nullptr;
6052 }
6053 // A value of -1 is used to check if we need to emit no teams region
6054 MinTeamsVal = MaxTeamsVal = -1;
6055 return nullptr;
6056 }
6057 case OMPD_target_teams_loop:
6058 case OMPD_target_teams:
6059 case OMPD_target_teams_distribute:
6060 case OMPD_target_teams_distribute_simd:
6061 case OMPD_target_teams_distribute_parallel_for:
6062 case OMPD_target_teams_distribute_parallel_for_simd: {
6063 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6064 const Expr *NumTeams =
6065 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6066 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6067 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6068 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6069 return NumTeams;
6070 }
6071 MinTeamsVal = MaxTeamsVal = 0;
6072 return nullptr;
6073 }
6074 case OMPD_target_parallel:
6075 case OMPD_target_parallel_for:
6076 case OMPD_target_parallel_for_simd:
6077 case OMPD_target_parallel_loop:
6078 case OMPD_target_simd:
6079 MinTeamsVal = MaxTeamsVal = 1;
6080 return nullptr;
6081 case OMPD_parallel:
6082 case OMPD_for:
6083 case OMPD_parallel_for:
6084 case OMPD_parallel_loop:
6085 case OMPD_parallel_master:
6086 case OMPD_parallel_sections:
6087 case OMPD_for_simd:
6088 case OMPD_parallel_for_simd:
6089 case OMPD_cancel:
6090 case OMPD_cancellation_point:
6091 case OMPD_ordered:
6092 case OMPD_threadprivate:
6093 case OMPD_allocate:
6094 case OMPD_task:
6095 case OMPD_simd:
6096 case OMPD_tile:
6097 case OMPD_unroll:
6098 case OMPD_sections:
6099 case OMPD_section:
6100 case OMPD_single:
6101 case OMPD_master:
6102 case OMPD_critical:
6103 case OMPD_taskyield:
6104 case OMPD_barrier:
6105 case OMPD_taskwait:
6106 case OMPD_taskgroup:
6107 case OMPD_atomic:
6108 case OMPD_flush:
6109 case OMPD_depobj:
6110 case OMPD_scan:
6111 case OMPD_teams:
6112 case OMPD_target_data:
6113 case OMPD_target_exit_data:
6114 case OMPD_target_enter_data:
6115 case OMPD_distribute:
6116 case OMPD_distribute_simd:
6117 case OMPD_distribute_parallel_for:
6118 case OMPD_distribute_parallel_for_simd:
6119 case OMPD_teams_distribute:
6120 case OMPD_teams_distribute_simd:
6121 case OMPD_teams_distribute_parallel_for:
6122 case OMPD_teams_distribute_parallel_for_simd:
6123 case OMPD_target_update:
6124 case OMPD_declare_simd:
6125 case OMPD_declare_variant:
6126 case OMPD_begin_declare_variant:
6127 case OMPD_end_declare_variant:
6128 case OMPD_declare_target:
6129 case OMPD_end_declare_target:
6130 case OMPD_declare_reduction:
6131 case OMPD_declare_mapper:
6132 case OMPD_taskloop:
6133 case OMPD_taskloop_simd:
6134 case OMPD_master_taskloop:
6135 case OMPD_master_taskloop_simd:
6136 case OMPD_parallel_master_taskloop:
6137 case OMPD_parallel_master_taskloop_simd:
6138 case OMPD_requires:
6139 case OMPD_metadirective:
6140 case OMPD_unknown:
6141 break;
6142 default:
6143 break;
6144 }
6145 llvm_unreachable("Unexpected directive kind.");
6146}
6147
6150 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6151 "Clauses associated with the teams directive expected to be emitted "
6152 "only for the host!");
6153 CGBuilderTy &Bld = CGF.Builder;
6154 int32_t MinNT = -1, MaxNT = -1;
6155 const Expr *NumTeams =
6156 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6157 if (NumTeams != nullptr) {
6158 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6159
6160 switch (DirectiveKind) {
6161 case OMPD_target: {
6162 const auto *CS = D.getInnermostCapturedStmt();
6163 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6164 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6165 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6166 /*IgnoreResultAssign*/ true);
6167 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6168 /*isSigned=*/true);
6169 }
6170 case OMPD_target_teams:
6171 case OMPD_target_teams_distribute:
6172 case OMPD_target_teams_distribute_simd:
6173 case OMPD_target_teams_distribute_parallel_for:
6174 case OMPD_target_teams_distribute_parallel_for_simd: {
6175 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6176 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6177 /*IgnoreResultAssign*/ true);
6178 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6179 /*isSigned=*/true);
6180 }
6181 default:
6182 break;
6183 }
6184 }
6185
6186 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6187 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6188}
6189
6190/// Check for a num threads constant value (stored in \p DefaultVal), or
6191/// expression (stored in \p E). If the value is conditional (via an if-clause),
6192/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6193/// nullptr, no expression evaluation is perfomed.
6194static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6195 const Expr **E, int32_t &UpperBound,
6196 bool UpperBoundOnly, llvm::Value **CondVal) {
6198 CGF.getContext(), CS->getCapturedStmt());
6199 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6200 if (!Dir)
6201 return;
6202
6203 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6204 // Handle if clause. If if clause present, the number of threads is
6205 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6206 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6207 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6208 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6209 const OMPIfClause *IfClause = nullptr;
6210 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6211 if (C->getNameModifier() == OMPD_unknown ||
6212 C->getNameModifier() == OMPD_parallel) {
6213 IfClause = C;
6214 break;
6215 }
6216 }
6217 if (IfClause) {
6218 const Expr *CondExpr = IfClause->getCondition();
6219 bool Result;
6220 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6221 if (!Result) {
6222 UpperBound = 1;
6223 return;
6224 }
6225 } else {
6226 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6227 if (const auto *PreInit =
6228 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6229 for (const auto *I : PreInit->decls()) {
6230 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6231 CGF.EmitVarDecl(cast<VarDecl>(*I));
6232 } else {
6233 CodeGenFunction::AutoVarEmission Emission =
6234 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6235 CGF.EmitAutoVarCleanups(Emission);
6236 }
6237 }
6238 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6239 }
6240 }
6241 }
6242 }
6243 // Check the value of num_threads clause iff if clause was not specified
6244 // or is not evaluated to false.
6245 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6246 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6247 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6248 const auto *NumThreadsClause =
6249 Dir->getSingleClause<OMPNumThreadsClause>();
6250 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6251 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6252 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6253 UpperBound =
6254 UpperBound
6255 ? Constant->getZExtValue()
6256 : std::min(UpperBound,
6257 static_cast<int32_t>(Constant->getZExtValue()));
6258 // If we haven't found a upper bound, remember we saw a thread limiting
6259 // clause.
6260 if (UpperBound == -1)
6261 UpperBound = 0;
6262 if (!E)
6263 return;
6264 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6265 if (const auto *PreInit =
6266 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6267 for (const auto *I : PreInit->decls()) {
6268 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6269 CGF.EmitVarDecl(cast<VarDecl>(*I));
6270 } else {
6271 CodeGenFunction::AutoVarEmission Emission =
6272 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6273 CGF.EmitAutoVarCleanups(Emission);
6274 }
6275 }
6276 }
6277 *E = NTExpr;
6278 }
6279 return;
6280 }
6281 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6282 UpperBound = 1;
6283}
6284
6286 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6287 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6288 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6289 "Clauses associated with the teams directive expected to be emitted "
6290 "only for the host!");
6291 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6292 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6293 "Expected target-based executable directive.");
6294
6295 const Expr *NT = nullptr;
6296 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6297
6298 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6299 if (E->isIntegerConstantExpr(CGF.getContext())) {
6300 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6301 UpperBound = UpperBound ? Constant->getZExtValue()
6302 : std::min(UpperBound,
6303 int32_t(Constant->getZExtValue()));
6304 }
6305 // If we haven't found a upper bound, remember we saw a thread limiting
6306 // clause.
6307 if (UpperBound == -1)
6308 UpperBound = 0;
6309 if (EPtr)
6310 *EPtr = E;
6311 };
6312
6313 auto ReturnSequential = [&]() {
6314 UpperBound = 1;
6315 return NT;
6316 };
6317
6318 switch (DirectiveKind) {
6319 case OMPD_target: {
6320 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6321 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6323 CGF.getContext(), CS->getCapturedStmt());
6324 // TODO: The standard is not clear how to resolve two thread limit clauses,
6325 // let's pick the teams one if it's present, otherwise the target one.
6326 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6327 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6328 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6329 ThreadLimitClause = TLC;
6330 if (ThreadLimitExpr) {
6331 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6332 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6334 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6335 if (const auto *PreInit =
6336 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6337 for (const auto *I : PreInit->decls()) {
6338 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6339 CGF.EmitVarDecl(cast<VarDecl>(*I));
6340 } else {
6342 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6343 CGF.EmitAutoVarCleanups(Emission);
6344 }
6345 }
6346 }
6347 }
6348 }
6349 }
6350 if (ThreadLimitClause)
6351 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6352 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6353 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6354 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6355 CS = Dir->getInnermostCapturedStmt();
6357 CGF.getContext(), CS->getCapturedStmt());
6358 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6359 }
6360 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6361 CS = Dir->getInnermostCapturedStmt();
6362 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6363 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6364 return ReturnSequential();
6365 }
6366 return NT;
6367 }
6368 case OMPD_target_teams: {
6369 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6370 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6371 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6372 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6373 }
6374 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6375 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6377 CGF.getContext(), CS->getCapturedStmt());
6378 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6379 if (Dir->getDirectiveKind() == OMPD_distribute) {
6380 CS = Dir->getInnermostCapturedStmt();
6381 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6382 }
6383 }
6384 return NT;
6385 }
6386 case OMPD_target_teams_distribute:
6387 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6388 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6389 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6390 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6391 }
6392 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6393 UpperBoundOnly, CondVal);
6394 return NT;
6395 case OMPD_target_teams_loop:
6396 case OMPD_target_parallel_loop:
6397 case OMPD_target_parallel:
6398 case OMPD_target_parallel_for:
6399 case OMPD_target_parallel_for_simd:
6400 case OMPD_target_teams_distribute_parallel_for:
6401 case OMPD_target_teams_distribute_parallel_for_simd: {
6402 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6403 const OMPIfClause *IfClause = nullptr;
6404 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6405 if (C->getNameModifier() == OMPD_unknown ||
6406 C->getNameModifier() == OMPD_parallel) {
6407 IfClause = C;
6408 break;
6409 }
6410 }
6411 if (IfClause) {
6412 const Expr *Cond = IfClause->getCondition();
6413 bool Result;
6414 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6415 if (!Result)
6416 return ReturnSequential();
6417 } else {
6419 *CondVal = CGF.EvaluateExprAsBool(Cond);
6420 }
6421 }
6422 }
6423 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6424 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6425 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6426 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6427 }
6428 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6429 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6430 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6431 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6432 return NumThreadsClause->getNumThreads();
6433 }
6434 return NT;
6435 }
6436 case OMPD_target_teams_distribute_simd:
6437 case OMPD_target_simd:
6438 return ReturnSequential();
6439 default:
6440 break;
6441 }
6442 llvm_unreachable("Unsupported directive kind.");
6443}
6444
6447 llvm::Value *NumThreadsVal = nullptr;
6448 llvm::Value *CondVal = nullptr;
6449 llvm::Value *ThreadLimitVal = nullptr;
6450 const Expr *ThreadLimitExpr = nullptr;
6451 int32_t UpperBound = -1;
6452
6454 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6455 &ThreadLimitExpr);
6456
6457 // Thread limit expressions are used below, emit them.
6458 if (ThreadLimitExpr) {
6459 ThreadLimitVal =
6460 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6461 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6462 /*isSigned=*/false);
6463 }
6464
6465 // Generate the num teams expression.
6466 if (UpperBound == 1) {
6467 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6468 } else if (NT) {
6469 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6470 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6471 /*isSigned=*/false);
6472 } else if (ThreadLimitVal) {
6473 // If we do not have a num threads value but a thread limit, replace the
6474 // former with the latter. We know handled the thread limit expression.
6475 NumThreadsVal = ThreadLimitVal;
6476 ThreadLimitVal = nullptr;
6477 } else {
6478 // Default to "0" which means runtime choice.
6479 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6480 NumThreadsVal = CGF.Builder.getInt32(0);
6481 }
6482
6483 // Handle if clause. If if clause present, the number of threads is
6484 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6485 if (CondVal) {
6487 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6488 CGF.Builder.getInt32(1));
6489 }
6490
6491 // If the thread limit and num teams expression were present, take the
6492 // minimum.
6493 if (ThreadLimitVal) {
6494 NumThreadsVal = CGF.Builder.CreateSelect(
6495 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6496 ThreadLimitVal, NumThreadsVal);
6497 }
6498
6499 return NumThreadsVal;
6500}
6501
6502namespace {
6504
6505// Utility to handle information from clauses associated with a given
6506// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6507// It provides a convenient interface to obtain the information and generate
6508// code for that information.
6509class MappableExprsHandler {
6510public:
6511 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6512 static unsigned getFlagMemberOffset() {
6513 unsigned Offset = 0;
6514 for (uint64_t Remain =
6515 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6516 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6517 !(Remain & 1); Remain = Remain >> 1)
6518 Offset++;
6519 return Offset;
6520 }
6521
6522 /// Class that holds debugging information for a data mapping to be passed to
6523 /// the runtime library.
6524 class MappingExprInfo {
6525 /// The variable declaration used for the data mapping.
6526 const ValueDecl *MapDecl = nullptr;
6527 /// The original expression used in the map clause, or null if there is
6528 /// none.
6529 const Expr *MapExpr = nullptr;
6530
6531 public:
6532 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6533 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6534
6535 const ValueDecl *getMapDecl() const { return MapDecl; }
6536 const Expr *getMapExpr() const { return MapExpr; }
6537 };
6538
6539 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6540 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6541 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6542 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6543 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6544 using MapNonContiguousArrayTy =
6545 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6546 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6547 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6548
6549 /// This structure contains combined information generated for mappable
6550 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6551 /// mappers, and non-contiguous information.
6552 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6553 MapExprsArrayTy Exprs;
6554 MapValueDeclsArrayTy Mappers;
6555 MapValueDeclsArrayTy DevicePtrDecls;
6556
6557 /// Append arrays in \a CurInfo.
6558 void append(MapCombinedInfoTy &CurInfo) {
6559 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6560 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6561 CurInfo.DevicePtrDecls.end());
6562 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6563 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6564 }
6565 };
6566
6567 /// Map between a struct and the its lowest & highest elements which have been
6568 /// mapped.
6569 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6570 /// HE(FieldIndex, Pointer)}
6571 struct StructRangeInfoTy {
6572 MapCombinedInfoTy PreliminaryMapData;
6573 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6574 0, Address::invalid()};
6575 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6576 0, Address::invalid()};
6579 bool IsArraySection = false;
6580 bool HasCompleteRecord = false;
6581 };
6582
6583private:
6584 /// Kind that defines how a device pointer has to be returned.
6585 struct MapInfo {
6589 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6590 bool ReturnDevicePointer = false;
6591 bool IsImplicit = false;
6592 const ValueDecl *Mapper = nullptr;
6593 const Expr *VarRef = nullptr;
6594 bool ForDeviceAddr = false;
6595
6596 MapInfo() = default;
6597 MapInfo(
6599 OpenMPMapClauseKind MapType,
6601 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6602 bool ReturnDevicePointer, bool IsImplicit,
6603 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6604 bool ForDeviceAddr = false)
6605 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6606 MotionModifiers(MotionModifiers),
6607 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6608 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6609 };
6610
6611 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6612 /// member and there is no map information about it, then emission of that
6613 /// entry is deferred until the whole struct has been processed.
6614 struct DeferredDevicePtrEntryTy {
6615 const Expr *IE = nullptr;
6616 const ValueDecl *VD = nullptr;
6617 bool ForDeviceAddr = false;
6618
6619 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6620 bool ForDeviceAddr)
6621 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6622 };
6623
6624 /// The target directive from where the mappable clauses were extracted. It
6625 /// is either a executable directive or a user-defined mapper directive.
6626 llvm::PointerUnion<const OMPExecutableDirective *,
6627 const OMPDeclareMapperDecl *>
6628 CurDir;
6629
6630 /// Function the directive is being generated for.
6631 CodeGenFunction &CGF;
6632
6633 /// Set of all first private variables in the current directive.
6634 /// bool data is set to true if the variable is implicitly marked as
6635 /// firstprivate, false otherwise.
6636 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6637
6638 /// Map between device pointer declarations and their expression components.
6639 /// The key value for declarations in 'this' is null.
6640 llvm::DenseMap<
6641 const ValueDecl *,
6643 DevPointersMap;
6644
6645 /// Map between device addr declarations and their expression components.
6646 /// The key value for declarations in 'this' is null.
6647 llvm::DenseMap<
6648 const ValueDecl *,
6650 HasDevAddrsMap;
6651
6652 /// Map between lambda declarations and their map type.
6653 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6654
6655 llvm::Value *getExprTypeSize(const Expr *E) const {
6656 QualType ExprTy = E->getType().getCanonicalType();
6657
6658 // Calculate the size for array shaping expression.
6659 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6660 llvm::Value *Size =
6661 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6662 for (const Expr *SE : OAE->getDimensions()) {
6663 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6664 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6665 CGF.getContext().getSizeType(),
6666 SE->getExprLoc());
6667 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6668 }
6669 return Size;
6670 }
6671
6672 // Reference types are ignored for mapping purposes.
6673 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6674 ExprTy = RefTy->getPointeeType().getCanonicalType();
6675
6676 // Given that an array section is considered a built-in type, we need to
6677 // do the calculation based on the length of the section instead of relying
6678 // on CGF.getTypeSize(E->getType()).
6679 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6681 OAE->getBase()->IgnoreParenImpCasts())
6683
6684 // If there is no length associated with the expression and lower bound is
6685 // not specified too, that means we are using the whole length of the
6686 // base.
6687 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6688 !OAE->getLowerBound())
6689 return CGF.getTypeSize(BaseTy);
6690
6691 llvm::Value *ElemSize;
6692 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6693 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6694 } else {
6695 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6696 assert(ATy && "Expecting array type if not a pointer type.");
6697 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6698 }
6699
6700 // If we don't have a length at this point, that is because we have an
6701 // array section with a single element.
6702 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6703 return ElemSize;
6704
6705 if (const Expr *LenExpr = OAE->getLength()) {
6706 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6707 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6708 CGF.getContext().getSizeType(),
6709 LenExpr->getExprLoc());
6710 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6711 }
6712 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6713 OAE->getLowerBound() && "expected array_section[lb:].");
6714 // Size = sizetype - lb * elemtype;
6715 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6716 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6717 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6718 CGF.getContext().getSizeType(),
6719 OAE->getLowerBound()->getExprLoc());
6720 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6721 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6722 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6723 LengthVal = CGF.Builder.CreateSelect(
6724 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6725 return LengthVal;
6726 }
6727 return CGF.getTypeSize(ExprTy);
6728 }
6729
6730 /// Return the corresponding bits for a given map clause modifier. Add
6731 /// a flag marking the map as a pointer if requested. Add a flag marking the
6732 /// map as the first one of a series of maps that relate to the same map
6733 /// expression.
6734 OpenMPOffloadMappingFlags getMapTypeBits(
6736 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6737 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6738 OpenMPOffloadMappingFlags Bits =
6739 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6740 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6741 switch (MapType) {
6742 case OMPC_MAP_alloc:
6743 case OMPC_MAP_release:
6744 // alloc and release is the default behavior in the runtime library, i.e.
6745 // if we don't pass any bits alloc/release that is what the runtime is
6746 // going to do. Therefore, we don't need to signal anything for these two
6747 // type modifiers.
6748 break;
6749 case OMPC_MAP_to:
6750 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6751 break;
6752 case OMPC_MAP_from:
6753 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6754 break;
6755 case OMPC_MAP_tofrom:
6756 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6757 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6758 break;
6759 case OMPC_MAP_delete:
6760 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6761 break;
6762 case OMPC_MAP_unknown:
6763 llvm_unreachable("Unexpected map type!");
6764 }
6765 if (AddPtrFlag)
6766 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6767 if (AddIsTargetParamFlag)
6768 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6769 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6770 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6771 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6772 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6773 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6774 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6775 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6776 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6777 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6778 if (IsNonContiguous)
6779 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6780 return Bits;
6781 }
6782
6783 /// Return true if the provided expression is a final array section. A
6784 /// final array section, is one whose length can't be proved to be one.
6785 bool isFinalArraySectionExpression(const Expr *E) const {
6786 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6787
6788 // It is not an array section and therefore not a unity-size one.
6789 if (!OASE)
6790 return false;
6791
6792 // An array section with no colon always refer to a single element.
6793 if (OASE->getColonLocFirst().isInvalid())
6794 return false;
6795
6796 const Expr *Length = OASE->getLength();
6797
6798 // If we don't have a length we have to check if the array has size 1
6799 // for this dimension. Also, we should always expect a length if the
6800 // base type is pointer.
6801 if (!Length) {
6803 OASE->getBase()->IgnoreParenImpCasts())
6805 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6806 return ATy->getSExtSize() != 1;
6807 // If we don't have a constant dimension length, we have to consider
6808 // the current section as having any size, so it is not necessarily
6809 // unitary. If it happen to be unity size, that's user fault.
6810 return true;
6811 }
6812
6813 // Check if the length evaluates to 1.
6814 Expr::EvalResult Result;
6815 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6816 return true; // Can have more that size 1.
6817
6818 llvm::APSInt ConstLength = Result.Val.getInt();
6819 return ConstLength.getSExtValue() != 1;
6820 }
6821
6822 /// Generate the base pointers, section pointers, sizes, map type bits, and
6823 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6824 /// map type, map or motion modifiers, and expression components.
6825 /// \a IsFirstComponent should be set to true if the provided set of
6826 /// components is the first associated with a capture.
6827 void generateInfoForComponentList(
6829 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6831 MapCombinedInfoTy &CombinedInfo,
6832 MapCombinedInfoTy &StructBaseCombinedInfo,
6833 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6834 bool IsImplicit, bool GenerateAllInfoForClauses,
6835 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6836 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6838 OverlappedElements = std::nullopt,
6839 bool AreBothBasePtrAndPteeMapped = false) const {
6840 // The following summarizes what has to be generated for each map and the
6841 // types below. The generated information is expressed in this order:
6842 // base pointer, section pointer, size, flags
6843 // (to add to the ones that come from the map type and modifier).
6844 //
6845 // double d;
6846 // int i[100];
6847 // float *p;
6848 // int **a = &i;
6849 //
6850 // struct S1 {
6851 // int i;
6852 // float f[50];
6853 // }
6854 // struct S2 {
6855 // int i;
6856 // float f[50];
6857 // S1 s;
6858 // double *p;
6859 // struct S2 *ps;
6860 // int &ref;
6861 // }
6862 // S2 s;
6863 // S2 *ps;
6864 //
6865 // map(d)
6866 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6867 //
6868 // map(i)
6869 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6870 //
6871 // map(i[1:23])
6872 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6873 //
6874 // map(p)
6875 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6876 //
6877 // map(p[1:24])
6878 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6879 // in unified shared memory mode or for local pointers
6880 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6881 //
6882 // map((*a)[0:3])
6883 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6884 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6885 //
6886 // map(**a)
6887 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6888 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6889 //
6890 // map(s)
6891 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6892 //
6893 // map(s.i)
6894 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6895 //
6896 // map(s.s.f)
6897 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6898 //
6899 // map(s.p)
6900 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6901 //
6902 // map(to: s.p[:22])
6903 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6904 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6905 // &(s.p), &(s.p[0]), 22*sizeof(double),
6906 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6907 // (*) alloc space for struct members, only this is a target parameter
6908 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6909 // optimizes this entry out, same in the examples below)
6910 // (***) map the pointee (map: to)
6911 //
6912 // map(to: s.ref)
6913 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6914 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6915 // (*) alloc space for struct members, only this is a target parameter
6916 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6917 // optimizes this entry out, same in the examples below)
6918 // (***) map the pointee (map: to)
6919 //
6920 // map(s.ps)
6921 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6922 //
6923 // map(from: s.ps->s.i)
6924 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6925 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6926 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6927 //
6928 // map(to: s.ps->ps)
6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6932 //
6933 // map(s.ps->ps->ps)
6934 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6935 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6936 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6937 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6938 //
6939 // map(to: s.ps->ps->s.f[:22])
6940 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6941 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6942 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6943 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6944 //
6945 // map(ps)
6946 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6947 //
6948 // map(ps->i)
6949 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6950 //
6951 // map(ps->s.f)
6952 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6953 //
6954 // map(from: ps->p)
6955 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6956 //
6957 // map(to: ps->p[:22])
6958 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6959 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6960 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6961 //
6962 // map(ps->ps)
6963 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6964 //
6965 // map(from: ps->ps->s.i)
6966 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6967 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6968 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6969 //
6970 // map(from: ps->ps->ps)
6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6974 //
6975 // map(ps->ps->ps->ps)
6976 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6977 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6978 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6979 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6980 //
6981 // map(to: ps->ps->ps->s.f[:22])
6982 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6983 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6984 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6985 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6986 //
6987 // map(to: s.f[:22]) map(from: s.p[:33])
6988 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6989 // sizeof(double*) (**), TARGET_PARAM
6990 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6991 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6992 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6993 // (*) allocate contiguous space needed to fit all mapped members even if
6994 // we allocate space for members not mapped (in this example,
6995 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6996 // them as well because they fall between &s.f[0] and &s.p)
6997 //
6998 // map(from: s.f[:22]) map(to: ps->p[:33])
6999 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7000 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7001 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7002 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7003 // (*) the struct this entry pertains to is the 2nd element in the list of
7004 // arguments, hence MEMBER_OF(2)
7005 //
7006 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7007 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7008 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7009 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7010 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7011 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7012 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7013 // (*) the struct this entry pertains to is the 4th element in the list
7014 // of arguments, hence MEMBER_OF(4)
7015 //
7016 // map(p, p[:100])
7017 // ===> map(p[:100])
7018 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7019
7020 // Track if the map information being generated is the first for a capture.
7021 bool IsCaptureFirstInfo = IsFirstComponentList;
7022 // When the variable is on a declare target link or in a to clause with
7023 // unified memory, a reference is needed to hold the host/device address
7024 // of the variable.
7025 bool RequiresReference = false;
7026
7027 // Scan the components from the base to the complete expression.
7028 auto CI = Components.rbegin();
7029 auto CE = Components.rend();
7030 auto I = CI;
7031
7032 // Track if the map information being generated is the first for a list of
7033 // components.
7034 bool IsExpressionFirstInfo = true;
7035 bool FirstPointerInComplexData = false;
7037 const Expr *AssocExpr = I->getAssociatedExpression();
7038 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7039 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7040 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7041
7042 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7043 return;
7044 if (isa<MemberExpr>(AssocExpr)) {
7045 // The base is the 'this' pointer. The content of the pointer is going
7046 // to be the base of the field being mapped.
7047 BP = CGF.LoadCXXThisAddress();
7048 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7049 (OASE &&
7050 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7051 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7052 } else if (OAShE &&
7053 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7054 BP = Address(
7055 CGF.EmitScalarExpr(OAShE->getBase()),
7056 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7057 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7058 } else {
7059 // The base is the reference to the variable.
7060 // BP = &Var.
7061 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7062 if (const auto *VD =
7063 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7064 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7065 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7066 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7067 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7068 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7070 RequiresReference = true;
7072 }
7073 }
7074 }
7075
7076 // If the variable is a pointer and is being dereferenced (i.e. is not
7077 // the last component), the base has to be the pointer itself, not its
7078 // reference. References are ignored for mapping purposes.
7079 QualType Ty =
7080 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7081 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7082 // No need to generate individual map information for the pointer, it
7083 // can be associated with the combined storage if shared memory mode is
7084 // active or the base declaration is not global variable.
7085 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7086 if (!AreBothBasePtrAndPteeMapped &&
7088 !VD || VD->hasLocalStorage()))
7089 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7090 else
7091 FirstPointerInComplexData = true;
7092 ++I;
7093 }
7094 }
7095
7096 // Track whether a component of the list should be marked as MEMBER_OF some
7097 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7098 // in a component list should be marked as MEMBER_OF, all subsequent entries
7099 // do not belong to the base struct. E.g.
7100 // struct S2 s;
7101 // s.ps->ps->ps->f[:]
7102 // (1) (2) (3) (4)
7103 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7104 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7105 // is the pointee of ps(2) which is not member of struct s, so it should not
7106 // be marked as such (it is still PTR_AND_OBJ).
7107 // The variable is initialized to false so that PTR_AND_OBJ entries which
7108 // are not struct members are not considered (e.g. array of pointers to
7109 // data).
7110 bool ShouldBeMemberOf = false;
7111
7112 // Variable keeping track of whether or not we have encountered a component
7113 // in the component list which is a member expression. Useful when we have a
7114 // pointer or a final array section, in which case it is the previous
7115 // component in the list which tells us whether we have a member expression.
7116 // E.g. X.f[:]
7117 // While processing the final array section "[:]" it is "f" which tells us
7118 // whether we are dealing with a member of a declared struct.
7119 const MemberExpr *EncounteredME = nullptr;
7120
7121 // Track for the total number of dimension. Start from one for the dummy
7122 // dimension.
7123 uint64_t DimSize = 1;
7124
7125 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7126 bool IsPrevMemberReference = false;
7127
7128 // We need to check if we will be encountering any MEs. If we do not
7129 // encounter any ME expression it means we will be mapping the whole struct.
7130 // In that case we need to skip adding an entry for the struct to the
7131 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7132 // list only when generating all info for clauses.
7133 bool IsMappingWholeStruct = true;
7134 if (!GenerateAllInfoForClauses) {
7135 IsMappingWholeStruct = false;
7136 } else {
7137 for (auto TempI = I; TempI != CE; ++TempI) {
7138 const MemberExpr *PossibleME =
7139 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7140 if (PossibleME) {
7141 IsMappingWholeStruct = false;
7142 break;
7143 }
7144 }
7145 }
7146
7147 for (; I != CE; ++I) {
7148 // If the current component is member of a struct (parent struct) mark it.
7149 if (!EncounteredME) {
7150 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7151 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7152 // as MEMBER_OF the parent struct.
7153 if (EncounteredME) {
7154 ShouldBeMemberOf = true;
7155 // Do not emit as complex pointer if this is actually not array-like
7156 // expression.
7157 if (FirstPointerInComplexData) {
7158 QualType Ty = std::prev(I)
7159 ->getAssociatedDeclaration()
7160 ->getType()
7161 .getNonReferenceType();
7162 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7163 FirstPointerInComplexData = false;
7164 }
7165 }
7166 }
7167
7168 auto Next = std::next(I);
7169
7170 // We need to generate the addresses and sizes if this is the last
7171 // component, if the component is a pointer or if it is an array section
7172 // whose length can't be proved to be one. If this is a pointer, it
7173 // becomes the base address for the following components.
7174
7175 // A final array section, is one whose length can't be proved to be one.
7176 // If the map item is non-contiguous then we don't treat any array section
7177 // as final array section.
7178 bool IsFinalArraySection =
7179 !IsNonContiguous &&
7180 isFinalArraySectionExpression(I->getAssociatedExpression());
7181
7182 // If we have a declaration for the mapping use that, otherwise use
7183 // the base declaration of the map clause.
7184 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7185 ? I->getAssociatedDeclaration()
7186 : BaseDecl;
7187 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7188 : MapExpr;
7189
7190 // Get information on whether the element is a pointer. Have to do a
7191 // special treatment for array sections given that they are built-in
7192 // types.
7193 const auto *OASE =
7194 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7195 const auto *OAShE =
7196 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7197 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7198 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7199 bool IsPointer =
7200 OAShE ||
7203 ->isAnyPointerType()) ||
7204 I->getAssociatedExpression()->getType()->isAnyPointerType();
7205 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7206 MapDecl &&
7207 MapDecl->getType()->isLValueReferenceType();
7208 bool IsNonDerefPointer = IsPointer &&
7209 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7210 !IsNonContiguous;
7211
7212 if (OASE)
7213 ++DimSize;
7214
7215 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7216 IsFinalArraySection) {
7217 // If this is not the last component, we expect the pointer to be
7218 // associated with an array expression or member expression.
7219 assert((Next == CE ||
7220 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7221 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7222 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7223 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7224 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7225 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7226 "Unexpected expression");
7227
7229 Address LowestElem = Address::invalid();
7230 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7231 const MemberExpr *E) {
7232 const Expr *BaseExpr = E->getBase();
7233 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7234 // scalar.
7235 LValue BaseLV;
7236 if (E->isArrow()) {
7237 LValueBaseInfo BaseInfo;
7238 TBAAAccessInfo TBAAInfo;
7239 Address Addr =
7240 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7241 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7242 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7243 } else {
7244 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7245 }
7246 return BaseLV;
7247 };
7248 if (OAShE) {
7249 LowestElem = LB =
7250 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7252 OAShE->getBase()->getType()->getPointeeType()),
7254 OAShE->getBase()->getType()));
7255 } else if (IsMemberReference) {
7256 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7257 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7258 LowestElem = CGF.EmitLValueForFieldInitialization(
7259 BaseLVal, cast<FieldDecl>(MapDecl))
7260 .getAddress();
7261 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7262 .getAddress();
7263 } else {
7264 LowestElem = LB =
7265 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7266 .getAddress();
7267 }
7268
7269 // If this component is a pointer inside the base struct then we don't
7270 // need to create any entry for it - it will be combined with the object
7271 // it is pointing to into a single PTR_AND_OBJ entry.
7272 bool IsMemberPointerOrAddr =
7273 EncounteredME &&
7274 (((IsPointer || ForDeviceAddr) &&
7275 I->getAssociatedExpression() == EncounteredME) ||
7276 (IsPrevMemberReference && !IsPointer) ||
7277 (IsMemberReference && Next != CE &&
7278 !Next->getAssociatedExpression()->getType()->isPointerType()));
7279 if (!OverlappedElements.empty() && Next == CE) {
7280 // Handle base element with the info for overlapped elements.
7281 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7282 assert(!IsPointer &&
7283 "Unexpected base element with the pointer type.");
7284 // Mark the whole struct as the struct that requires allocation on the
7285 // device.
7286 PartialStruct.LowestElem = {0, LowestElem};
7287 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7288 I->getAssociatedExpression()->getType());
7291 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7292 TypeSize.getQuantity() - 1);
7293 PartialStruct.HighestElem = {
7294 std::numeric_limits<decltype(
7295 PartialStruct.HighestElem.first)>::max(),
7296 HB};
7297 PartialStruct.Base = BP;
7298 PartialStruct.LB = LB;
7299 assert(
7300 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7301 "Overlapped elements must be used only once for the variable.");
7302 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7303 // Emit data for non-overlapped data.
7304 OpenMPOffloadMappingFlags Flags =
7305 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7306 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7307 /*AddPtrFlag=*/false,
7308 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7309 llvm::Value *Size = nullptr;
7310 // Do bitcopy of all non-overlapped structure elements.
7312 Component : OverlappedElements) {
7313 Address ComponentLB = Address::invalid();
7315 Component) {
7316 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7317 const auto *FD = dyn_cast<FieldDecl>(VD);
7318 if (FD && FD->getType()->isLValueReferenceType()) {
7319 const auto *ME =
7320 cast<MemberExpr>(MC.getAssociatedExpression());
7321 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7322 ComponentLB =
7323 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7324 .getAddress();
7325 } else {
7326 ComponentLB =
7327 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7328 .getAddress();
7329 }
7330 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7331 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7332 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7333 LBPtr);
7334 break;
7335 }
7336 }
7337 assert(Size && "Failed to determine structure size");
7338 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7339 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7340 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7341 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7342 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7343 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7344 Size, CGF.Int64Ty, /*isSigned=*/true));
7345 CombinedInfo.Types.push_back(Flags);
7346 CombinedInfo.Mappers.push_back(nullptr);
7347 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7348 : 1);
7349 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7350 }
7351 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7352 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7353 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7354 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7355 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7356 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7357 Size = CGF.Builder.CreatePtrDiff(
7358 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7359 LBPtr);
7360 CombinedInfo.Sizes.push_back(
7361 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7362 CombinedInfo.Types.push_back(Flags);
7363 CombinedInfo.Mappers.push_back(nullptr);
7364 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7365 : 1);
7366 break;
7367 }
7368 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7369 // Skip adding an entry in the CurInfo of this combined entry if the
7370 // whole struct is currently being mapped. The struct needs to be added
7371 // in the first position before any data internal to the struct is being
7372 // mapped.
7373 if (!IsMemberPointerOrAddr ||
7374 (Next == CE && MapType != OMPC_MAP_unknown)) {
7375 if (!IsMappingWholeStruct) {
7376 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7377 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7378 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7379 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7380 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7381 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7382 Size, CGF.Int64Ty, /*isSigned=*/true));
7383 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7384 : 1);
7385 } else {
7386 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7387 StructBaseCombinedInfo.BasePointers.push_back(
7388 BP.emitRawPointer(CGF));
7389 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7390 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7391 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7392 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7393 Size, CGF.Int64Ty, /*isSigned=*/true));
7394 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7395 IsNonContiguous ? DimSize : 1);
7396 }
7397
7398 // If Mapper is valid, the last component inherits the mapper.
7399 bool HasMapper = Mapper && Next == CE;
7400 if (!IsMappingWholeStruct)
7401 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7402 else
7403 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7404 : nullptr);
7405
7406 // We need to add a pointer flag for each map that comes from the
7407 // same expression except for the first one. We also need to signal
7408 // this map is the first one that relates with the current capture
7409 // (there is a set of entries for each capture).
7410 OpenMPOffloadMappingFlags Flags =
7411 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7412 !IsExpressionFirstInfo || RequiresReference ||
7413 FirstPointerInComplexData || IsMemberReference,
7414 AreBothBasePtrAndPteeMapped ||
7415 (IsCaptureFirstInfo && !RequiresReference),
7416 IsNonContiguous);
7417
7418 if (!IsExpressionFirstInfo || IsMemberReference) {
7419 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7420 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7421 if (IsPointer || (IsMemberReference && Next != CE))
7422 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7423 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7424 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7425 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7426 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7427
7428 if (ShouldBeMemberOf) {
7429 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7430 // should be later updated with the correct value of MEMBER_OF.
7431 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7432 // From now on, all subsequent PTR_AND_OBJ entries should not be
7433 // marked as MEMBER_OF.
7434 ShouldBeMemberOf = false;
7435 }
7436 }
7437
7438 if (!IsMappingWholeStruct)
7439 CombinedInfo.Types.push_back(Flags);
7440 else
7441 StructBaseCombinedInfo.Types.push_back(Flags);
7442 }
7443
7444 // If we have encountered a member expression so far, keep track of the
7445 // mapped member. If the parent is "*this", then the value declaration
7446 // is nullptr.
7447 if (EncounteredME) {
7448 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7449 unsigned FieldIndex = FD->getFieldIndex();
7450
7451 // Update info about the lowest and highest elements for this struct
7452 if (!PartialStruct.Base.isValid()) {
7453 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7454 if (IsFinalArraySection) {
7455 Address HB =
7456 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7457 .getAddress();
7458 PartialStruct.HighestElem = {FieldIndex, HB};
7459 } else {
7460 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7461 }
7462 PartialStruct.Base = BP;
7463 PartialStruct.LB = BP;
7464 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7465 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7466 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7467 if (IsFinalArraySection) {
7468 Address HB =
7469 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7470 .getAddress();
7471 PartialStruct.HighestElem = {FieldIndex, HB};
7472 } else {
7473 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7474 }
7475 }
7476 }
7477
7478 // Need to emit combined struct for array sections.
7479 if (IsFinalArraySection || IsNonContiguous)
7480 PartialStruct.IsArraySection = true;
7481
7482 // If we have a final array section, we are done with this expression.
7483 if (IsFinalArraySection)
7484 break;
7485
7486 // The pointer becomes the base for the next element.
7487 if (Next != CE)
7488 BP = IsMemberReference ? LowestElem : LB;
7489
7490 IsExpressionFirstInfo = false;
7491 IsCaptureFirstInfo = false;
7492 FirstPointerInComplexData = false;
7493 IsPrevMemberReference = IsMemberReference;
7494 } else if (FirstPointerInComplexData) {
7495 QualType Ty = Components.rbegin()
7496 ->getAssociatedDeclaration()
7497 ->getType()
7498 .getNonReferenceType();
7499 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7500 FirstPointerInComplexData = false;
7501 }
7502 }
7503 // If ran into the whole component - allocate the space for the whole
7504 // record.
7505 if (!EncounteredME)
7506 PartialStruct.HasCompleteRecord = true;
7507
7508 if (!IsNonContiguous)
7509 return;
7510
7511 const ASTContext &Context = CGF.getContext();
7512
7513 // For supporting stride in array section, we need to initialize the first
7514 // dimension size as 1, first offset as 0, and first count as 1
7515 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7516 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7517 MapValuesArrayTy CurStrides;
7518 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7519 uint64_t ElementTypeSize;
7520
7521 // Collect Size information for each dimension and get the element size as
7522 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7523 // should be [10, 10] and the first stride is 4 btyes.
7525 Components) {
7526 const Expr *AssocExpr = Component.getAssociatedExpression();
7527 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7528
7529 if (!OASE)
7530 continue;
7531
7532 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7533 auto *CAT = Context.getAsConstantArrayType(Ty);
7534 auto *VAT = Context.getAsVariableArrayType(Ty);
7535
7536 // We need all the dimension size except for the last dimension.
7537 assert((VAT || CAT || &Component == &*Components.begin()) &&
7538 "Should be either ConstantArray or VariableArray if not the "
7539 "first Component");
7540
7541 // Get element size if CurStrides is empty.
7542 if (CurStrides.empty()) {
7543 const Type *ElementType = nullptr;
7544 if (CAT)
7545 ElementType = CAT->getElementType().getTypePtr();
7546 else if (VAT)
7547 ElementType = VAT->getElementType().getTypePtr();
7548 else
7549 assert(&Component == &*Components.begin() &&
7550 "Only expect pointer (non CAT or VAT) when this is the "
7551 "first Component");
7552 // If ElementType is null, then it means the base is a pointer
7553 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7554 // for next iteration.
7555 if (ElementType) {
7556 // For the case that having pointer as base, we need to remove one
7557 // level of indirection.
7558 if (&Component != &*Components.begin())
7559 ElementType = ElementType->getPointeeOrArrayElementType();
7560 ElementTypeSize =
7561 Context.getTypeSizeInChars(ElementType).getQuantity();
7562 CurStrides.push_back(
7563 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7564 }
7565 }
7566 // Get dimension value except for the last dimension since we don't need
7567 // it.
7568 if (DimSizes.size() < Components.size() - 1) {
7569 if (CAT)
7570 DimSizes.push_back(
7571 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7572 else if (VAT)
7573 DimSizes.push_back(CGF.Builder.CreateIntCast(
7574 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7575 /*IsSigned=*/false));
7576 }
7577 }
7578
7579 // Skip the dummy dimension since we have already have its information.
7580 auto *DI = DimSizes.begin() + 1;
7581 // Product of dimension.
7582 llvm::Value *DimProd =
7583 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7584
7585 // Collect info for non-contiguous. Notice that offset, count, and stride
7586 // are only meaningful for array-section, so we insert a null for anything
7587 // other than array-section.
7588 // Also, the size of offset, count, and stride are not the same as
7589 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7590 // count, and stride are the same as the number of non-contiguous
7591 // declaration in target update to/from clause.
7593 Components) {
7594 const Expr *AssocExpr = Component.getAssociatedExpression();
7595
7596 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7597 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7598 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7599 /*isSigned=*/false);
7600 CurOffsets.push_back(Offset);
7601 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7602 CurStrides.push_back(CurStrides.back());
7603 continue;
7604 }
7605
7606 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7607
7608 if (!OASE)
7609 continue;
7610
7611 // Offset
7612 const Expr *OffsetExpr = OASE->getLowerBound();
7613 llvm::Value *Offset = nullptr;
7614 if (!OffsetExpr) {
7615 // If offset is absent, then we just set it to zero.
7616 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7617 } else {
7618 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7619 CGF.Int64Ty,
7620 /*isSigned=*/false);
7621 }
7622 CurOffsets.push_back(Offset);
7623
7624 // Count
7625 const Expr *CountExpr = OASE->getLength();
7626 llvm::Value *Count = nullptr;
7627 if (!CountExpr) {
7628 // In Clang, once a high dimension is an array section, we construct all
7629 // the lower dimension as array section, however, for case like
7630 // arr[0:2][2], Clang construct the inner dimension as an array section
7631 // but it actually is not in an array section form according to spec.
7632 if (!OASE->getColonLocFirst().isValid() &&
7633 !OASE->getColonLocSecond().isValid()) {
7634 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7635 } else {
7636 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7637 // When the length is absent it defaults to ⌈(size −
7638 // lower-bound)/stride⌉, where size is the size of the array
7639 // dimension.
7640 const Expr *StrideExpr = OASE->getStride();
7641 llvm::Value *Stride =
7642 StrideExpr
7643 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7644 CGF.Int64Ty, /*isSigned=*/false)
7645 : nullptr;
7646 if (Stride)
7647 Count = CGF.Builder.CreateUDiv(
7648 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7649 else
7650 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7651 }
7652 } else {
7653 Count = CGF.EmitScalarExpr(CountExpr);
7654 }
7655 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7656 CurCounts.push_back(Count);
7657
7658 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7659 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7660 // Offset Count Stride
7661 // D0 0 1 4 (int) <- dummy dimension
7662 // D1 0 2 8 (2 * (1) * 4)
7663 // D2 1 2 20 (1 * (1 * 5) * 4)
7664 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7665 const Expr *StrideExpr = OASE->getStride();
7666 llvm::Value *Stride =
7667 StrideExpr
7668 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7669 CGF.Int64Ty, /*isSigned=*/false)
7670 : nullptr;
7671 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7672 if (Stride)
7673 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7674 else
7675 CurStrides.push_back(DimProd);
7676 if (DI != DimSizes.end())
7677 ++DI;
7678 }
7679
7680 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7681 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7682 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7683 }
7684
7685 /// Return the adjusted map modifiers if the declaration a capture refers to
7686 /// appears in a first-private clause. This is expected to be used only with
7687 /// directives that start with 'target'.
7688 OpenMPOffloadMappingFlags
7689 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7690 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7691
7692 // A first private variable captured by reference will use only the
7693 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7694 // declaration is known as first-private in this handler.
7695 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7696 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7697 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7698 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7699 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7700 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7701 }
7702 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7703 if (I != LambdasMap.end())
7704 // for map(to: lambda): using user specified map type.
7705 return getMapTypeBits(
7706 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7707 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7708 /*AddPtrFlag=*/false,
7709 /*AddIsTargetParamFlag=*/false,
7710 /*isNonContiguous=*/false);
7711 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7712 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7713 }
7714
7715 void getPlainLayout(const CXXRecordDecl *RD,
7717 bool AsBase) const {
7718 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7719
7720 llvm::StructType *St =
7721 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7722
7723 unsigned NumElements = St->getNumElements();
7725 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7726 RecordLayout(NumElements);
7727
7728 // Fill bases.
7729 for (const auto &I : RD->bases()) {
7730 if (I.isVirtual())
7731 continue;
7732
7733 QualType BaseTy = I.getType();
7734 const auto *Base = BaseTy->getAsCXXRecordDecl();
7735 // Ignore empty bases.
7736 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7737 CGF.getContext()
7740 .isZero())
7741 continue;
7742
7743 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7744 RecordLayout[FieldIndex] = Base;
7745 }
7746 // Fill in virtual bases.
7747 for (const auto &I : RD->vbases()) {
7748 QualType BaseTy = I.getType();
7749 // Ignore empty bases.
7750 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7751 continue;
7752
7753 const auto *Base = BaseTy->getAsCXXRecordDecl();
7754 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7755 if (RecordLayout[FieldIndex])
7756 continue;
7757 RecordLayout[FieldIndex] = Base;
7758 }
7759 // Fill in all the fields.
7760 assert(!RD->isUnion() && "Unexpected union.");
7761 for (const auto *Field : RD->fields()) {
7762 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7763 // will fill in later.)
7764 if (!Field->isBitField() &&
7765 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
7766 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7767 RecordLayout[FieldIndex] = Field;
7768 }
7769 }
7770 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7771 &Data : RecordLayout) {
7772 if (Data.isNull())
7773 continue;
7774 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7775 getPlainLayout(Base, Layout, /*AsBase=*/true);
7776 else
7777 Layout.push_back(Data.get<const FieldDecl *>());
7778 }
7779 }
7780
7781 /// Generate all the base pointers, section pointers, sizes, map types, and
7782 /// mappers for the extracted mappable expressions (all included in \a
7783 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7784 /// pair of the relevant declaration and index where it occurs is appended to
7785 /// the device pointers info array.
7786 void generateAllInfoForClauses(
7787 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7788 llvm::OpenMPIRBuilder &OMPBuilder,
7789 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7791 // We have to process the component lists that relate with the same
7792 // declaration in a single chunk so that we can generate the map flags
7793 // correctly. Therefore, we organize all lists in a map.
7794 enum MapKind { Present, Allocs, Other, Total };
7795 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7797 Info;
7798
7799 // Helper function to fill the information map for the different supported
7800 // clauses.
7801 auto &&InfoGen =
7802 [&Info, &SkipVarSet](
7803 const ValueDecl *D, MapKind Kind,
7805 OpenMPMapClauseKind MapType,
7807 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7808 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7809 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7810 if (SkipVarSet.contains(D))
7811 return;
7812 auto It = Info.find(D);
7813 if (It == Info.end())
7814 It = Info
7815 .insert(std::make_pair(
7817 .first;
7818 It->second[Kind].emplace_back(
7819 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7820 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7821 };
7822
7823 for (const auto *Cl : Clauses) {
7824 const auto *C = dyn_cast<OMPMapClause>(Cl);
7825 if (!C)
7826 continue;
7827 MapKind Kind = Other;
7828 if (llvm::is_contained(C->getMapTypeModifiers(),
7829 OMPC_MAP_MODIFIER_present))
7830 Kind = Present;
7831 else if (C->getMapType() == OMPC_MAP_alloc)
7832 Kind = Allocs;
7833 const auto *EI = C->getVarRefs().begin();
7834 for (const auto L : C->component_lists()) {
7835 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7836 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7837 C->getMapTypeModifiers(), std::nullopt,
7838 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7839 E);
7840 ++EI;
7841 }
7842 }
7843 for (const auto *Cl : Clauses) {
7844 const auto *C = dyn_cast<OMPToClause>(Cl);
7845 if (!C)
7846 continue;
7847 MapKind Kind = Other;
7848 if (llvm::is_contained(C->getMotionModifiers(),
7849 OMPC_MOTION_MODIFIER_present))
7850 Kind = Present;
7851 const auto *EI = C->getVarRefs().begin();
7852 for (const auto L : C->component_lists()) {
7853 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7854 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7855 C->isImplicit(), std::get<2>(L), *EI);
7856 ++EI;
7857 }
7858 }
7859 for (const auto *Cl : Clauses) {
7860 const auto *C = dyn_cast<OMPFromClause>(Cl);
7861 if (!C)
7862 continue;
7863 MapKind Kind = Other;
7864 if (llvm::is_contained(C->getMotionModifiers(),
7865 OMPC_MOTION_MODIFIER_present))
7866 Kind = Present;
7867 const auto *EI = C->getVarRefs().begin();
7868 for (const auto L : C->component_lists()) {
7869 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7870 std::nullopt, C->getMotionModifiers(),
7871 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7872 *EI);
7873 ++EI;
7874 }
7875 }
7876
7877 // Look at the use_device_ptr and use_device_addr clauses information and
7878 // mark the existing map entries as such. If there is no map information for
7879 // an entry in the use_device_ptr and use_device_addr list, we create one
7880 // with map type 'alloc' and zero size section. It is the user fault if that
7881 // was not mapped before. If there is no map information and the pointer is
7882 // a struct member, then we defer the emission of that entry until the whole
7883 // struct has been processed.
7884 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7886 DeferredInfo;
7887 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7888
7889 auto &&UseDeviceDataCombinedInfoGen =
7890 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7891 CodeGenFunction &CGF, bool IsDevAddr) {
7892 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7893 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7894 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7895 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7896 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7897 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7898 UseDeviceDataCombinedInfo.Sizes.push_back(
7899 llvm::Constant::getNullValue(CGF.Int64Ty));
7900 UseDeviceDataCombinedInfo.Types.push_back(
7901 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7902 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7903 };
7904
7905 auto &&MapInfoGen =
7906 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7907 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7909 Components,
7910 bool IsImplicit, bool IsDevAddr) {
7911 // We didn't find any match in our map information - generate a zero
7912 // size array section - if the pointer is a struct member we defer
7913 // this action until the whole struct has been processed.
7914 if (isa<MemberExpr>(IE)) {
7915 // Insert the pointer into Info to be processed by
7916 // generateInfoForComponentList. Because it is a member pointer
7917 // without a pointee, no entry will be generated for it, therefore
7918 // we need to generate one after the whole struct has been
7919 // processed. Nonetheless, generateInfoForComponentList must be
7920 // called to take the pointer into account for the calculation of
7921 // the range of the partial struct.
7922 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7923 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7924 nullptr, nullptr, IsDevAddr);
7925 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7926 } else {
7927 llvm::Value *Ptr;
7928 if (IsDevAddr) {
7929 if (IE->isGLValue())
7930 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7931 else
7932 Ptr = CGF.EmitScalarExpr(IE);
7933 } else {
7934 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7935 }
7936 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7937 }
7938 };
7939
7940 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7941 const Expr *IE, bool IsDevAddr) -> bool {
7942 // We potentially have map information for this declaration already.
7943 // Look for the first set of components that refer to it. If found,
7944 // return true.
7945 // If the first component is a member expression, we have to look into
7946 // 'this', which maps to null in the map of map information. Otherwise
7947 // look directly for the information.
7948 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7949 if (It != Info.end()) {
7950 bool Found = false;
7951 for (auto &Data : It->second) {
7952 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7953 return MI.Components.back().getAssociatedDeclaration() == VD;
7954 });
7955 // If we found a map entry, signal that the pointer has to be
7956 // returned and move on to the next declaration. Exclude cases where
7957 // the base pointer is mapped as array subscript, array section or
7958 // array shaping. The base address is passed as a pointer to base in
7959 // this case and cannot be used as a base for use_device_ptr list
7960 // item.
7961 if (CI != Data.end()) {
7962 if (IsDevAddr) {
7963 CI->ForDeviceAddr = IsDevAddr;
7964 CI->ReturnDevicePointer = true;
7965 Found = true;
7966 break;
7967 } else {
7968 auto PrevCI = std::next(CI->Components.rbegin());
7969 const auto *VarD = dyn_cast<VarDecl>(VD);
7971 isa<MemberExpr>(IE) ||
7972 !VD->getType().getNonReferenceType()->isPointerType() ||
7973 PrevCI == CI->Components.rend() ||
7974 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7975 VarD->hasLocalStorage()) {
7976 CI->ForDeviceAddr = IsDevAddr;
7977 CI->ReturnDevicePointer = true;
7978 Found = true;
7979 break;
7980 }
7981 }
7982 }
7983 }
7984 return Found;
7985 }
7986 return false;
7987 };
7988
7989 // Look at the use_device_ptr clause information and mark the existing map
7990 // entries as such. If there is no map information for an entry in the
7991 // use_device_ptr list, we create one with map type 'alloc' and zero size
7992 // section. It is the user fault if that was not mapped before. If there is
7993 // no map information and the pointer is a struct member, then we defer the
7994 // emission of that entry until the whole struct has been processed.
7995 for (const auto *Cl : Clauses) {
7996 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
7997 if (!C)
7998 continue;
7999 for (const auto L : C->component_lists()) {
8001 std::get<1>(L);
8002 assert(!Components.empty() &&
8003 "Not expecting empty list of components!");
8004 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8005 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8006 const Expr *IE = Components.back().getAssociatedExpression();
8007 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8008 continue;
8009 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8010 /*IsDevAddr=*/false);
8011 }
8012 }
8013
8014 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8015 for (const auto *Cl : Clauses) {
8016 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8017 if (!C)
8018 continue;
8019 for (const auto L : C->component_lists()) {
8021 std::get<1>(L);
8022 assert(!std::get<1>(L).empty() &&
8023 "Not expecting empty list of components!");
8024 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8025 if (!Processed.insert(VD).second)
8026 continue;
8027 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8028 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8029 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8030 continue;
8031 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8032 /*IsDevAddr=*/true);
8033 }
8034 }
8035
8036 for (const auto &Data : Info) {
8037 StructRangeInfoTy PartialStruct;
8038 // Current struct information:
8039 MapCombinedInfoTy CurInfo;
8040 // Current struct base information:
8041 MapCombinedInfoTy StructBaseCurInfo;
8042 const Decl *D = Data.first;
8043 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8044 bool HasMapBasePtr = false;
8045 bool HasMapArraySec = false;
8046 if (VD && VD->getType()->isAnyPointerType()) {
8047 for (const auto &M : Data.second) {
8048 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8049 return isa_and_present<DeclRefExpr>(L.VarRef);
8050 });
8051 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8052 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8053 L.VarRef);
8054 });
8055 if (HasMapBasePtr && HasMapArraySec)
8056 break;
8057 }
8058 }
8059 for (const auto &M : Data.second) {
8060 for (const MapInfo &L : M) {
8061 assert(!L.Components.empty() &&
8062 "Not expecting declaration with no component lists.");
8063
8064 // Remember the current base pointer index.
8065 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8066 unsigned StructBasePointersIdx =
8067 StructBaseCurInfo.BasePointers.size();
8068 CurInfo.NonContigInfo.IsNonContiguous =
8069 L.Components.back().isNonContiguous();
8070 generateInfoForComponentList(
8071 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8072 CurInfo, StructBaseCurInfo, PartialStruct,
8073 /*IsFirstComponentList=*/false, L.IsImplicit,
8074 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8075 L.VarRef, /*OverlappedElements*/ std::nullopt,
8076 HasMapBasePtr && HasMapArraySec);
8077
8078 // If this entry relates to a device pointer, set the relevant
8079 // declaration and add the 'return pointer' flag.
8080 if (L.ReturnDevicePointer) {
8081 // Check whether a value was added to either CurInfo or
8082 // StructBaseCurInfo and error if no value was added to either of
8083 // them:
8084 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8085 StructBasePointersIdx <
8086 StructBaseCurInfo.BasePointers.size()) &&
8087 "Unexpected number of mapped base pointers.");
8088
8089 // Choose a base pointer index which is always valid:
8090 const ValueDecl *RelevantVD =
8091 L.Components.back().getAssociatedDeclaration();
8092 assert(RelevantVD &&
8093 "No relevant declaration related with device pointer??");
8094
8095 // If StructBaseCurInfo has been updated this iteration then work on
8096 // the first new entry added to it i.e. make sure that when multiple
8097 // values are added to any of the lists, the first value added is
8098 // being modified by the assignments below (not the last value
8099 // added).
8100 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8101 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8102 RelevantVD;
8103 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8104 L.ForDeviceAddr ? DeviceInfoTy::Address
8105 : DeviceInfoTy::Pointer;
8106 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8107 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8108 } else {
8109 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8110 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8111 L.ForDeviceAddr ? DeviceInfoTy::Address
8112 : DeviceInfoTy::Pointer;
8113 CurInfo.Types[CurrentBasePointersIdx] |=
8114 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8115 }
8116 }
8117 }
8118 }
8119
8120 // Append any pending zero-length pointers which are struct members and
8121 // used with use_device_ptr or use_device_addr.
8122 auto CI = DeferredInfo.find(Data.first);
8123 if (CI != DeferredInfo.end()) {
8124 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8125 llvm::Value *BasePtr;
8126 llvm::Value *Ptr;
8127 if (L.ForDeviceAddr) {
8128 if (L.IE->isGLValue())
8129 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8130 else
8131 Ptr = this->CGF.EmitScalarExpr(L.IE);
8132 BasePtr = Ptr;
8133 // Entry is RETURN_PARAM. Also, set the placeholder value
8134 // MEMBER_OF=FFFF so that the entry is later updated with the
8135 // correct value of MEMBER_OF.
8136 CurInfo.Types.push_back(
8137 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8138 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8139 } else {
8140 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8141 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8142 L.IE->getExprLoc());
8143 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8144 // placeholder value MEMBER_OF=FFFF so that the entry is later
8145 // updated with the correct value of MEMBER_OF.
8146 CurInfo.Types.push_back(
8147 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8148 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8149 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8150 }
8151 CurInfo.Exprs.push_back(L.VD);
8152 CurInfo.BasePointers.emplace_back(BasePtr);
8153 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8154 CurInfo.DevicePointers.emplace_back(
8155 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8156 CurInfo.Pointers.push_back(Ptr);
8157 CurInfo.Sizes.push_back(
8158 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8159 CurInfo.Mappers.push_back(nullptr);
8160 }
8161 }
8162
8163 // Unify entries in one list making sure the struct mapping precedes the
8164 // individual fields:
8165 MapCombinedInfoTy UnionCurInfo;
8166 UnionCurInfo.append(StructBaseCurInfo);
8167 UnionCurInfo.append(CurInfo);
8168
8169 // If there is an entry in PartialStruct it means we have a struct with
8170 // individual members mapped. Emit an extra combined entry.
8171 if (PartialStruct.Base.isValid()) {
8172 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8173 // Emit a combined entry:
8174 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8175 /*IsMapThis*/ !VD, OMPBuilder, VD);
8176 }
8177
8178 // We need to append the results of this capture to what we already have.
8179 CombinedInfo.append(UnionCurInfo);
8180 }
8181 // Append data for use_device_ptr clauses.
8182 CombinedInfo.append(UseDeviceDataCombinedInfo);
8183 }
8184
8185public:
8186 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8187 : CurDir(&Dir), CGF(CGF) {
8188 // Extract firstprivate clause information.
8189 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8190 for (const auto *D : C->varlists())
8191 FirstPrivateDecls.try_emplace(
8192 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8193 // Extract implicit firstprivates from uses_allocators clauses.
8194 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8195 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8196 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8197 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8198 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8199 /*Implicit=*/true);
8200 else if (const auto *VD = dyn_cast<VarDecl>(
8201 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8202 ->getDecl()))
8203 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8204 }
8205 }
8206 // Extract device pointer clause information.
8207 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8208 for (auto L : C->component_lists())
8209 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8210 // Extract device addr clause information.
8211 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8212 for (auto L : C->component_lists())
8213 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8214 // Extract map information.
8215 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8216 if (C->getMapType() != OMPC_MAP_to)
8217 continue;
8218 for (auto L : C->component_lists()) {
8219 const ValueDecl *VD = std::get<0>(L);
8220 const auto *RD = VD ? VD->getType()
8224 : nullptr;
8225 if (RD && RD->isLambda())
8226 LambdasMap.try_emplace(std::get<0>(L), C);
8227 }
8228 }
8229 }
8230
8231 /// Constructor for the declare mapper directive.
8232 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8233 : CurDir(&Dir), CGF(CGF) {}
8234
8235 /// Generate code for the combined entry if we have a partially mapped struct
8236 /// and take care of the mapping flags of the arguments corresponding to
8237 /// individual struct members.
8238 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8239 MapFlagsArrayTy &CurTypes,
8240 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8241 llvm::OpenMPIRBuilder &OMPBuilder,
8242 const ValueDecl *VD = nullptr,
8243 bool NotTargetParams = true) const {
8244 if (CurTypes.size() == 1 &&
8245 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8246 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8247 !PartialStruct.IsArraySection)
8248 return;
8249 Address LBAddr = PartialStruct.LowestElem.second;
8250 Address HBAddr = PartialStruct.HighestElem.second;
8251 if (PartialStruct.HasCompleteRecord) {
8252 LBAddr = PartialStruct.LB;
8253 HBAddr = PartialStruct.LB;
8254 }
8255 CombinedInfo.Exprs.push_back(VD);
8256 // Base is the base of the struct
8257 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8258 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8259 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8260 // Pointer is the address of the lowest element
8261 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8262 const CXXMethodDecl *MD =
8263 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8264 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8265 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8266 // There should not be a mapper for a combined entry.
8267 if (HasBaseClass) {
8268 // OpenMP 5.2 148:21:
8269 // If the target construct is within a class non-static member function,
8270 // and a variable is an accessible data member of the object for which the
8271 // non-static data member function is invoked, the variable is treated as
8272 // if the this[:1] expression had appeared in a map clause with a map-type
8273 // of tofrom.
8274 // Emit this[:1]
8275 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8277 llvm::Value *Size =
8278 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8279 /*isSigned=*/true);
8280 CombinedInfo.Sizes.push_back(Size);
8281 } else {
8282 CombinedInfo.Pointers.push_back(LB);
8283 // Size is (addr of {highest+1} element) - (addr of lowest element)
8284 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8285 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8286 HBAddr.getElementType(), HB, /*Idx0=*/1);
8287 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8288 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8289 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8290 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8291 /*isSigned=*/false);
8292 CombinedInfo.Sizes.push_back(Size);
8293 }
8294 CombinedInfo.Mappers.push_back(nullptr);
8295 // Map type is always TARGET_PARAM, if generate info for captures.
8296 CombinedInfo.Types.push_back(
8297 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8298 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8299 // If any element has the present modifier, then make sure the runtime
8300 // doesn't attempt to allocate the struct.
8301 if (CurTypes.end() !=
8302 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8303 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8304 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8305 }))
8306 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8307 // Remove TARGET_PARAM flag from the first element
8308 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8309 // If any element has the ompx_hold modifier, then make sure the runtime
8310 // uses the hold reference count for the struct as a whole so that it won't
8311 // be unmapped by an extra dynamic reference count decrement. Add it to all
8312 // elements as well so the runtime knows which reference count to check
8313 // when determining whether it's time for device-to-host transfers of
8314 // individual elements.
8315 if (CurTypes.end() !=
8316 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8317 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8318 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8319 })) {
8320 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8321 for (auto &M : CurTypes)
8322 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8323 }
8324
8325 // All other current entries will be MEMBER_OF the combined entry
8326 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8327 // 0xFFFF in the MEMBER_OF field).
8328 OpenMPOffloadMappingFlags MemberOfFlag =
8329 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8330 for (auto &M : CurTypes)
8331 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8332 }
8333
8334 /// Generate all the base pointers, section pointers, sizes, map types, and
8335 /// mappers for the extracted mappable expressions (all included in \a
8336 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8337 /// pair of the relevant declaration and index where it occurs is appended to
8338 /// the device pointers info array.
8339 void generateAllInfo(
8340 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8341 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8343 assert(CurDir.is<const OMPExecutableDirective *>() &&
8344 "Expect a executable directive");
8345 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8346 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8347 SkipVarSet);
8348 }
8349
8350 /// Generate all the base pointers, section pointers, sizes, map types, and
8351 /// mappers for the extracted map clauses of user-defined mapper (all included
8352 /// in \a CombinedInfo).
8353 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8354 llvm::OpenMPIRBuilder &OMPBuilder) const {
8355 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8356 "Expect a declare mapper directive");
8357 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8358 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8359 OMPBuilder);
8360 }
8361
8362 /// Emit capture info for lambdas for variables captured by reference.
8363 void generateInfoForLambdaCaptures(
8364 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8365 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8367 const auto *RD = VDType->getAsCXXRecordDecl();
8368 if (!RD || !RD->isLambda())
8369 return;
8370 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8371 CGF.getContext().getDeclAlign(VD));
8372 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8373 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8374 FieldDecl *ThisCapture = nullptr;
8375 RD->getCaptureFields(Captures, ThisCapture);
8376 if (ThisCapture) {
8377 LValue ThisLVal =
8378 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8379 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8380 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8381 VDLVal.getPointer(CGF));
8382 CombinedInfo.Exprs.push_back(VD);
8383 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8384 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8385 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8386 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8387 CombinedInfo.Sizes.push_back(
8388 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8389 CGF.Int64Ty, /*isSigned=*/true));
8390 CombinedInfo.Types.push_back(
8391 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8392 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8393 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8394 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8395 CombinedInfo.Mappers.push_back(nullptr);
8396 }
8397 for (const LambdaCapture &LC : RD->captures()) {
8398 if (!LC.capturesVariable())
8399 continue;
8400 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8401 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8402 continue;
8403 auto It = Captures.find(VD);
8404 assert(It != Captures.end() && "Found lambda capture without field.");
8405 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8406 if (LC.getCaptureKind() == LCK_ByRef) {
8407 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8408 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8409 VDLVal.getPointer(CGF));
8410 CombinedInfo.Exprs.push_back(VD);
8411 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8412 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8413 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8414 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8415 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8416 CGF.getTypeSize(
8418 CGF.Int64Ty, /*isSigned=*/true));
8419 } else {
8420 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8421 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8422 VDLVal.getPointer(CGF));
8423 CombinedInfo.Exprs.push_back(VD);
8424 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8425 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8426 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8427 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8428 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8429 }
8430 CombinedInfo.Types.push_back(
8431 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8432 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8433 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8434 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8435 CombinedInfo.Mappers.push_back(nullptr);
8436 }
8437 }
8438
8439 /// Set correct indices for lambdas captures.
8440 void adjustMemberOfForLambdaCaptures(
8441 llvm::OpenMPIRBuilder &OMPBuilder,
8442 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8443 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8444 MapFlagsArrayTy &Types) const {
8445 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8446 // Set correct member_of idx for all implicit lambda captures.
8447 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8448 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8449 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8450 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8451 continue;
8452 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8453 assert(BasePtr && "Unable to find base lambda address.");
8454 int TgtIdx = -1;
8455 for (unsigned J = I; J > 0; --J) {
8456 unsigned Idx = J - 1;
8457 if (Pointers[Idx] != BasePtr)
8458 continue;
8459 TgtIdx = Idx;
8460 break;
8461 }
8462 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8463 // All other current entries will be MEMBER_OF the combined entry
8464 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8465 // 0xFFFF in the MEMBER_OF field).
8466 OpenMPOffloadMappingFlags MemberOfFlag =
8467 OMPBuilder.getMemberOfFlag(TgtIdx);
8468 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8469 }
8470 }
8471
8472 /// Generate the base pointers, section pointers, sizes, map types, and
8473 /// mappers associated to a given capture (all included in \a CombinedInfo).
8474 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8475 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8476 StructRangeInfoTy &PartialStruct) const {
8477 assert(!Cap->capturesVariableArrayType() &&
8478 "Not expecting to generate map info for a variable array type!");
8479
8480 // We need to know when we generating information for the first component
8481 const ValueDecl *VD = Cap->capturesThis()
8482 ? nullptr
8483 : Cap->getCapturedVar()->getCanonicalDecl();
8484
8485 // for map(to: lambda): skip here, processing it in
8486 // generateDefaultMapInfo
8487 if (LambdasMap.count(VD))
8488 return;
8489
8490 // If this declaration appears in a is_device_ptr clause we just have to
8491 // pass the pointer by value. If it is a reference to a declaration, we just
8492 // pass its value.
8493 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8494 CombinedInfo.Exprs.push_back(VD);
8495 CombinedInfo.BasePointers.emplace_back(Arg);
8496 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8497 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8498 CombinedInfo.Pointers.push_back(Arg);
8499 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8500 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8501 /*isSigned=*/true));
8502 CombinedInfo.Types.push_back(
8503 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8504 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8505 CombinedInfo.Mappers.push_back(nullptr);
8506 return;
8507 }
8508
8509 using MapData =
8512 const ValueDecl *, const Expr *>;
8513 SmallVector<MapData, 4> DeclComponentLists;
8514 // For member fields list in is_device_ptr, store it in
8515 // DeclComponentLists for generating components info.
8517 auto It = DevPointersMap.find(VD);
8518 if (It != DevPointersMap.end())
8519 for (const auto &MCL : It->second)
8520 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8521 /*IsImpicit = */ true, nullptr,
8522 nullptr);
8523 auto I = HasDevAddrsMap.find(VD);
8524 if (I != HasDevAddrsMap.end())
8525 for (const auto &MCL : I->second)
8526 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8527 /*IsImpicit = */ true, nullptr,
8528 nullptr);
8529 assert(CurDir.is<const OMPExecutableDirective *>() &&
8530 "Expect a executable directive");
8531 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8532 bool HasMapBasePtr = false;
8533 bool HasMapArraySec = false;
8534 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8535 const auto *EI = C->getVarRefs().begin();
8536 for (const auto L : C->decl_component_lists(VD)) {
8537 const ValueDecl *VDecl, *Mapper;
8538 // The Expression is not correct if the mapping is implicit
8539 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8541 std::tie(VDecl, Components, Mapper) = L;
8542 assert(VDecl == VD && "We got information for the wrong declaration??");
8543 assert(!Components.empty() &&
8544 "Not expecting declaration with no component lists.");
8545 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8546 HasMapBasePtr = true;
8547 if (VD && E && VD->getType()->isAnyPointerType() &&
8548 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8549 HasMapArraySec = true;
8550 DeclComponentLists.emplace_back(Components, C->getMapType(),
8551 C->getMapTypeModifiers(),
8552 C->isImplicit(), Mapper, E);
8553 ++EI;
8554 }
8555 }
8556 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8557 const MapData &RHS) {
8558 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8559 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8560 bool HasPresent =
8561 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8562 bool HasAllocs = MapType == OMPC_MAP_alloc;
8563 MapModifiers = std::get<2>(RHS);
8564 MapType = std::get<1>(LHS);
8565 bool HasPresentR =
8566 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8567 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8568 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8569 });
8570
8571 // Find overlapping elements (including the offset from the base element).
8572 llvm::SmallDenseMap<
8573 const MapData *,
8576 4>
8577 OverlappedData;
8578 size_t Count = 0;
8579 for (const MapData &L : DeclComponentLists) {
8581 OpenMPMapClauseKind MapType;
8583 bool IsImplicit;
8584 const ValueDecl *Mapper;
8585 const Expr *VarRef;
8586 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8587 L;
8588 ++Count;
8589 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8591 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8592 VarRef) = L1;
8593 auto CI = Components.rbegin();
8594 auto CE = Components.rend();
8595 auto SI = Components1.rbegin();
8596 auto SE = Components1.rend();
8597 for (; CI != CE && SI != SE; ++CI, ++SI) {
8598 if (CI->getAssociatedExpression()->getStmtClass() !=
8599 SI->getAssociatedExpression()->getStmtClass())
8600 break;
8601 // Are we dealing with different variables/fields?
8602 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8603 break;
8604 }
8605 // Found overlapping if, at least for one component, reached the head
8606 // of the components list.
8607 if (CI == CE || SI == SE) {
8608 // Ignore it if it is the same component.
8609 if (CI == CE && SI == SE)
8610 continue;
8611 const auto It = (SI == SE) ? CI : SI;
8612 // If one component is a pointer and another one is a kind of
8613 // dereference of this pointer (array subscript, section, dereference,
8614 // etc.), it is not an overlapping.
8615 // Same, if one component is a base and another component is a
8616 // dereferenced pointer memberexpr with the same base.
8617 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8618 (std::prev(It)->getAssociatedDeclaration() &&
8619 std::prev(It)
8620 ->getAssociatedDeclaration()
8621 ->getType()
8622 ->isPointerType()) ||
8623 (It->getAssociatedDeclaration() &&
8624 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8625 std::next(It) != CE && std::next(It) != SE))
8626 continue;
8627 const MapData &BaseData = CI == CE ? L : L1;
8629 SI == SE ? Components : Components1;
8630 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8631 OverlappedElements.getSecond().push_back(SubData);
8632 }
8633 }
8634 }
8635 // Sort the overlapped elements for each item.
8637 if (!OverlappedData.empty()) {
8638 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8639 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8640 while (BaseType != OrigType) {
8641 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8642 OrigType = BaseType->getPointeeOrArrayElementType();
8643 }
8644
8645 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8646 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8647 else {
8648 const auto *RD = BaseType->getAsRecordDecl();
8649 Layout.append(RD->field_begin(), RD->field_end());
8650 }
8651 }
8652 for (auto &Pair : OverlappedData) {
8653 llvm::stable_sort(
8654 Pair.getSecond(),
8655 [&Layout](
8658 Second) {
8659 auto CI = First.rbegin();
8660 auto CE = First.rend();
8661 auto SI = Second.rbegin();
8662 auto SE = Second.rend();
8663 for (; CI != CE && SI != SE; ++CI, ++SI) {
8664 if (CI->getAssociatedExpression()->getStmtClass() !=
8665 SI->getAssociatedExpression()->getStmtClass())
8666 break;
8667 // Are we dealing with different variables/fields?
8668 if (CI->getAssociatedDeclaration() !=
8669 SI->getAssociatedDeclaration())
8670 break;
8671 }
8672
8673 // Lists contain the same elements.
8674 if (CI == CE && SI == SE)
8675 return false;
8676
8677 // List with less elements is less than list with more elements.
8678 if (CI == CE || SI == SE)
8679 return CI == CE;
8680
8681 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8682 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8683 if (FD1->getParent() == FD2->getParent())
8684 return FD1->getFieldIndex() < FD2->getFieldIndex();
8685 const auto *It =
8686 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8687 return FD == FD1 || FD == FD2;
8688 });
8689 return *It == FD1;
8690 });
8691 }
8692
8693 // Associated with a capture, because the mapping flags depend on it.
8694 // Go through all of the elements with the overlapped elements.
8695 bool IsFirstComponentList = true;
8696 MapCombinedInfoTy StructBaseCombinedInfo;
8697 for (const auto &Pair : OverlappedData) {
8698 const MapData &L = *Pair.getFirst();
8700 OpenMPMapClauseKind MapType;
8702 bool IsImplicit;
8703 const ValueDecl *Mapper;
8704 const Expr *VarRef;
8705 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8706 L;
8708 OverlappedComponents = Pair.getSecond();
8709 generateInfoForComponentList(
8710 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8711 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8712 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8713 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8714 IsFirstComponentList = false;
8715 }
8716 // Go through other elements without overlapped elements.
8717 for (const MapData &L : DeclComponentLists) {
8719 OpenMPMapClauseKind MapType;
8721 bool IsImplicit;
8722 const ValueDecl *Mapper;
8723 const Expr *VarRef;
8724 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8725 L;
8726 auto It = OverlappedData.find(&L);
8727 if (It == OverlappedData.end())
8728 generateInfoForComponentList(
8729 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8730 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8731 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8732 /*ForDeviceAddr=*/false, VD, VarRef,
8733 /*OverlappedElements*/ std::nullopt,
8734 HasMapBasePtr && HasMapArraySec);
8735 IsFirstComponentList = false;
8736 }
8737 }
8738
8739 /// Generate the default map information for a given capture \a CI,
8740 /// record field declaration \a RI and captured value \a CV.
8741 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8742 const FieldDecl &RI, llvm::Value *CV,
8743 MapCombinedInfoTy &CombinedInfo) const {
8744 bool IsImplicit = true;
8745 // Do the default mapping.
8746 if (CI.capturesThis()) {
8747 CombinedInfo.Exprs.push_back(nullptr);
8748 CombinedInfo.BasePointers.push_back(CV);
8749 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8750 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8751 CombinedInfo.Pointers.push_back(CV);
8752 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8753 CombinedInfo.Sizes.push_back(
8754 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8755 CGF.Int64Ty, /*isSigned=*/true));
8756 // Default map type.
8757 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8758 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8759 } else if (CI.capturesVariableByCopy()) {
8760 const VarDecl *VD = CI.getCapturedVar();
8761 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8762 CombinedInfo.BasePointers.push_back(CV);
8763 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8764 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8765 CombinedInfo.Pointers.push_back(CV);
8766 if (!RI.getType()->isAnyPointerType()) {
8767 // We have to signal to the runtime captures passed by value that are
8768 // not pointers.
8769 CombinedInfo.Types.push_back(
8770 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8771 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8772 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8773 } else {
8774 // Pointers are implicitly mapped with a zero size and no flags
8775 // (other than first map that is added for all implicit maps).
8776 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8777 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8778 }
8779 auto I = FirstPrivateDecls.find(VD);
8780 if (I != FirstPrivateDecls.end())
8781 IsImplicit = I->getSecond();
8782 } else {
8783 assert(CI.capturesVariable() && "Expected captured reference.");
8784 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8785 QualType ElementType = PtrTy->getPointeeType();
8786 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8787 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8788 // The default map type for a scalar/complex type is 'to' because by
8789 // default the value doesn't have to be retrieved. For an aggregate
8790 // type, the default is 'tofrom'.
8791 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8792 const VarDecl *VD = CI.getCapturedVar();
8793 auto I = FirstPrivateDecls.find(VD);
8794 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8795 CombinedInfo.BasePointers.push_back(CV);
8796 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8797 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8798 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8799 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8800 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8802 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8803 } else {
8804 CombinedInfo.Pointers.push_back(CV);
8805 }
8806 if (I != FirstPrivateDecls.end())
8807 IsImplicit = I->getSecond();
8808 }
8809 // Every default map produces a single argument which is a target parameter.
8810 CombinedInfo.Types.back() |=
8811 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8812
8813 // Add flag stating this is an implicit map.
8814 if (IsImplicit)
8815 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8816
8817 // No user-defined mapper for default mapping.
8818 CombinedInfo.Mappers.push_back(nullptr);
8819 }
8820};
8821} // anonymous namespace
8822
8823// Try to extract the base declaration from a `this->x` expression if possible.
8825 if (!E)
8826 return nullptr;
8827
8828 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8829 if (const MemberExpr *ME =
8830 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8831 return ME->getMemberDecl();
8832 return nullptr;
8833}
8834
8835/// Emit a string constant containing the names of the values mapped to the
8836/// offloading runtime library.
8837llvm::Constant *
8838emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8839 MappableExprsHandler::MappingExprInfo &MapExprs) {
8840
8841 uint32_t SrcLocStrSize;
8842 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8843 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8844
8846 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8847 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8848 Loc = VD->getLocation();
8849 else
8850 Loc = MapExprs.getMapExpr()->getExprLoc();
8851 } else {
8852 Loc = MapExprs.getMapDecl()->getLocation();
8853 }
8854
8855 std::string ExprName;
8856 if (MapExprs.getMapExpr()) {
8858 llvm::raw_string_ostream OS(ExprName);
8859 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8860 OS.flush();
8861 } else {
8862 ExprName = MapExprs.getMapDecl()->getNameAsString();
8863 }
8864
8866 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8867 PLoc.getLine(), PLoc.getColumn(),
8868 SrcLocStrSize);
8869}
8870
8871/// Emit the arrays used to pass the captures and map information to the
8872/// offloading runtime library. If there is no map or capture information,
8873/// return nullptr by reference.
8875 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8876 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8877 bool IsNonContiguous = false) {
8878 CodeGenModule &CGM = CGF.CGM;
8879
8880 // Reset the array information.
8881 Info.clearArrayInfo();
8882 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8883
8884 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8885 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8886 CGF.AllocaInsertPt->getIterator());
8887 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8888 CGF.Builder.GetInsertPoint());
8889
8890 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8891 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
8892 };
8893 if (CGM.getCodeGenOpts().getDebugInfo() !=
8894 llvm::codegenoptions::NoDebugInfo) {
8895 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
8896 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
8897 FillInfoMap);
8898 }
8899
8900 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8901 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8902 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8903 }
8904 };
8905
8906 auto CustomMapperCB = [&](unsigned int I) {
8907 llvm::Value *MFunc = nullptr;
8908 if (CombinedInfo.Mappers[I]) {
8909 Info.HasMapper = true;
8911 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8912 }
8913 return MFunc;
8914 };
8915 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8916 /*IsNonContiguous=*/true, DeviceAddrCB,
8917 CustomMapperCB);
8918}
8919
8920/// Check for inner distribute directive.
8921static const OMPExecutableDirective *
8923 const auto *CS = D.getInnermostCapturedStmt();
8924 const auto *Body =
8925 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8926 const Stmt *ChildStmt =
8928
8929 if (const auto *NestedDir =
8930 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8931 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8932 switch (D.getDirectiveKind()) {
8933 case OMPD_target:
8934 // For now, treat 'target' with nested 'teams loop' as if it's
8935 // distributed (target teams distribute).
8936 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8937 return NestedDir;
8938 if (DKind == OMPD_teams) {
8939 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8940 /*IgnoreCaptured=*/true);
8941 if (!Body)
8942 return nullptr;
8943 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8944 if (const auto *NND =
8945 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8946 DKind = NND->getDirectiveKind();
8947 if (isOpenMPDistributeDirective(DKind))
8948 return NND;
8949 }
8950 }
8951 return nullptr;
8952 case OMPD_target_teams:
8953 if (isOpenMPDistributeDirective(DKind))
8954 return NestedDir;
8955 return nullptr;
8956 case OMPD_target_parallel:
8957 case OMPD_target_simd:
8958 case OMPD_target_parallel_for:
8959 case OMPD_target_parallel_for_simd:
8960 return nullptr;
8961 case OMPD_target_teams_distribute:
8962 case OMPD_target_teams_distribute_simd:
8963 case OMPD_target_teams_distribute_parallel_for:
8964 case OMPD_target_teams_distribute_parallel_for_simd:
8965 case OMPD_parallel:
8966 case OMPD_for:
8967 case OMPD_parallel_for:
8968 case OMPD_parallel_master:
8969 case OMPD_parallel_sections:
8970 case OMPD_for_simd:
8971 case OMPD_parallel_for_simd:
8972 case OMPD_cancel:
8973 case OMPD_cancellation_point:
8974 case OMPD_ordered:
8975 case OMPD_threadprivate:
8976 case OMPD_allocate:
8977 case OMPD_task:
8978 case OMPD_simd:
8979 case OMPD_tile:
8980 case OMPD_unroll:
8981 case OMPD_sections:
8982 case OMPD_section:
8983 case OMPD_single:
8984 case OMPD_master:
8985 case OMPD_critical:
8986 case OMPD_taskyield:
8987 case OMPD_barrier:
8988 case OMPD_taskwait:
8989 case OMPD_taskgroup:
8990 case OMPD_atomic:
8991 case OMPD_flush:
8992 case OMPD_depobj:
8993 case OMPD_scan:
8994 case OMPD_teams:
8995 case OMPD_target_data:
8996 case OMPD_target_exit_data:
8997 case OMPD_target_enter_data:
8998 case OMPD_distribute:
8999 case OMPD_distribute_simd:
9000 case OMPD_distribute_parallel_for:
9001 case OMPD_distribute_parallel_for_simd:
9002 case OMPD_teams_distribute:
9003 case OMPD_teams_distribute_simd:
9004 case OMPD_teams_distribute_parallel_for:
9005 case OMPD_teams_distribute_parallel_for_simd:
9006 case OMPD_target_update:
9007 case OMPD_declare_simd:
9008 case OMPD_declare_variant:
9009 case OMPD_begin_declare_variant:
9010 case OMPD_end_declare_variant:
9011 case OMPD_declare_target:
9012 case OMPD_end_declare_target:
9013 case OMPD_declare_reduction:
9014 case OMPD_declare_mapper:
9015 case OMPD_taskloop:
9016 case OMPD_taskloop_simd:
9017 case OMPD_master_taskloop:
9018 case OMPD_master_taskloop_simd:
9019 case OMPD_parallel_master_taskloop:
9020 case OMPD_parallel_master_taskloop_simd:
9021 case OMPD_requires:
9022 case OMPD_metadirective:
9023 case OMPD_unknown:
9024 default:
9025 llvm_unreachable("Unexpected directive.");
9026 }
9027 }
9028
9029 return nullptr;
9030}
9031
9032/// Emit the user-defined mapper function. The code generation follows the
9033/// pattern in the example below.
9034/// \code
9035/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9036/// void *base, void *begin,
9037/// int64_t size, int64_t type,
9038/// void *name = nullptr) {
9039/// // Allocate space for an array section first or add a base/begin for
9040/// // pointer dereference.
9041/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9042/// !maptype.IsDelete)
9043/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9044/// size*sizeof(Ty), clearToFromMember(type));
9045/// // Map members.
9046/// for (unsigned i = 0; i < size; i++) {
9047/// // For each component specified by this mapper:
9048/// for (auto c : begin[i]->all_components) {
9049/// if (c.hasMapper())
9050/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9051/// c.arg_type, c.arg_name);
9052/// else
9053/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9054/// c.arg_begin, c.arg_size, c.arg_type,
9055/// c.arg_name);
9056/// }
9057/// }
9058/// // Delete the array section.
9059/// if (size > 1 && maptype.IsDelete)
9060/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9061/// size*sizeof(Ty), clearToFromMember(type));
9062/// }
9063/// \endcode
9065 CodeGenFunction *CGF) {
9066 if (UDMMap.count(D) > 0)
9067 return;
9069 QualType Ty = D->getType();
9070 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9071 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9072 auto *MapperVarDecl =
9073 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9075 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9076 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9077
9078 // Prepare mapper function arguments and attributes.
9079 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9080 C.VoidPtrTy, ImplicitParamKind::Other);
9081 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9083 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9084 C.VoidPtrTy, ImplicitParamKind::Other);
9085 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9087 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9089 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9091 FunctionArgList Args;
9092 Args.push_back(&HandleArg);
9093 Args.push_back(&BaseArg);
9094 Args.push_back(&BeginArg);
9095 Args.push_back(&SizeArg);
9096 Args.push_back(&TypeArg);
9097 Args.push_back(&NameArg);
9098 const CGFunctionInfo &FnInfo =
9100 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9101 SmallString<64> TyStr;
9102 llvm::raw_svector_ostream Out(TyStr);
9104 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9105 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9106 Name, &CGM.getModule());
9108 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9109 // Start the mapper function code generation.
9110 CodeGenFunction MapperCGF(CGM);
9111 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9112 // Compute the starting and end addresses of array elements.
9113 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9114 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9115 C.getPointerType(Int64Ty), Loc);
9116 // Prepare common arguments for array initiation and deletion.
9117 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9118 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9119 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9120 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9121 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9122 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9123 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9124 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9125 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9126 // Convert the size in bytes into the number of array elements.
9127 Size = MapperCGF.Builder.CreateExactUDiv(
9128 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9129 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9130 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9131 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9132 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9133 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9134 C.getPointerType(Int64Ty), Loc);
9135 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9136 MapperCGF.GetAddrOfLocalVar(&NameArg),
9137 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9138
9139 // Emit array initiation if this is an array section and \p MapType indicates
9140 // that memory allocation is required.
9141 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9142 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9143 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9144
9145 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9146
9147 // Emit the loop header block.
9148 MapperCGF.EmitBlock(HeadBB);
9149 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9150 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9151 // Evaluate whether the initial condition is satisfied.
9152 llvm::Value *IsEmpty =
9153 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9154 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9155 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9156
9157 // Emit the loop body block.
9158 MapperCGF.EmitBlock(BodyBB);
9159 llvm::BasicBlock *LastBB = BodyBB;
9160 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9161 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9162 PtrPHI->addIncoming(PtrBegin, EntryBB);
9163 Address PtrCurrent(PtrPHI, ElemTy,
9164 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9165 .getAlignment()
9166 .alignmentOfArrayElement(ElementSize));
9167 // Privatize the declared variable of mapper to be the current array element.
9169 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9170 (void)Scope.Privatize();
9171
9172 // Get map clause information. Fill up the arrays with all mapped variables.
9173 MappableExprsHandler::MapCombinedInfoTy Info;
9174 MappableExprsHandler MEHandler(*D, MapperCGF);
9175 MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9176
9177 // Call the runtime API __tgt_mapper_num_components to get the number of
9178 // pre-existing components.
9179 llvm::Value *OffloadingArgs[] = {Handle};
9180 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9181 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9182 OMPRTL___tgt_mapper_num_components),
9183 OffloadingArgs);
9184 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9185 PreviousSize,
9186 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9187
9188 // Fill up the runtime mapper handle for all components.
9189 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9190 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9191 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9192 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9193 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9194 llvm::Value *CurSizeArg = Info.Sizes[I];
9195 llvm::Value *CurNameArg =
9196 (CGM.getCodeGenOpts().getDebugInfo() ==
9197 llvm::codegenoptions::NoDebugInfo)
9198 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9199 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9200
9201 // Extract the MEMBER_OF field from the map type.
9202 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9203 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9204 Info.Types[I]));
9205 llvm::Value *MemberMapType =
9206 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9207
9208 // Combine the map type inherited from user-defined mapper with that
9209 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9210 // bits of the \a MapType, which is the input argument of the mapper
9211 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9212 // bits of MemberMapType.
9213 // [OpenMP 5.0], 1.2.6. map-type decay.
9214 // | alloc | to | from | tofrom | release | delete
9215 // ----------------------------------------------------------
9216 // alloc | alloc | alloc | alloc | alloc | release | delete
9217 // to | alloc | to | alloc | to | release | delete
9218 // from | alloc | alloc | from | from | release | delete
9219 // tofrom | alloc | to | from | tofrom | release | delete
9220 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9221 MapType,
9222 MapperCGF.Builder.getInt64(
9223 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9224 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9225 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9226 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9227 llvm::BasicBlock *AllocElseBB =
9228 MapperCGF.createBasicBlock("omp.type.alloc.else");
9229 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9230 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9231 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9232 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9233 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9234 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9235 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9236 MapperCGF.EmitBlock(AllocBB);
9237 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9238 MemberMapType,
9239 MapperCGF.Builder.getInt64(
9240 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9241 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9242 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9243 MapperCGF.Builder.CreateBr(EndBB);
9244 MapperCGF.EmitBlock(AllocElseBB);
9245 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9246 LeftToFrom,
9247 MapperCGF.Builder.getInt64(
9248 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9249 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9250 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9251 // In case of to, clear OMP_MAP_FROM.
9252 MapperCGF.EmitBlock(ToBB);
9253 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9254 MemberMapType,
9255 MapperCGF.Builder.getInt64(
9256 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9257 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9258 MapperCGF.Builder.CreateBr(EndBB);
9259 MapperCGF.EmitBlock(ToElseBB);
9260 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9261 LeftToFrom,
9262 MapperCGF.Builder.getInt64(
9263 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9264 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9265 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9266 // In case of from, clear OMP_MAP_TO.
9267 MapperCGF.EmitBlock(FromBB);
9268 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9269 MemberMapType,
9270 MapperCGF.Builder.getInt64(
9271 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9272 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9273 // In case of tofrom, do nothing.
9274 MapperCGF.EmitBlock(EndBB);
9275 LastBB = EndBB;
9276 llvm::PHINode *CurMapType =
9277 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9278 CurMapType->addIncoming(AllocMapType, AllocBB);
9279 CurMapType->addIncoming(ToMapType, ToBB);
9280 CurMapType->addIncoming(FromMapType, FromBB);
9281 CurMapType->addIncoming(MemberMapType, ToElseBB);
9282
9283 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9284 CurSizeArg, CurMapType, CurNameArg};
9285 if (Info.Mappers[I]) {
9286 // Call the corresponding mapper function.
9287 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9288 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9289 assert(MapperFunc && "Expect a valid mapper function is available.");
9290 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9291 } else {
9292 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9293 // data structure.
9294 MapperCGF.EmitRuntimeCall(
9295 OMPBuilder.getOrCreateRuntimeFunction(
9296 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9297 OffloadingArgs);
9298 }
9299 }
9300
9301 // Update the pointer to point to the next element that needs to be mapped,
9302 // and check whether we have mapped all elements.
9303 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9304 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9305 PtrPHI->addIncoming(PtrNext, LastBB);
9306 llvm::Value *IsDone =
9307 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9308 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9309 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9310
9311 MapperCGF.EmitBlock(ExitBB);
9312 // Emit array deletion if this is an array section and \p MapType indicates
9313 // that deletion is required.
9314 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9315 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9316
9317 // Emit the function exit block.
9318 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9319 MapperCGF.FinishFunction();
9320 UDMMap.try_emplace(D, Fn);
9321 if (CGF) {
9322 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9323 Decls.second.push_back(D);
9324 }
9325}
9326
9327/// Emit the array initialization or deletion portion for user-defined mapper
9328/// code generation. First, it evaluates whether an array section is mapped and
9329/// whether the \a MapType instructs to delete this section. If \a IsInit is
9330/// true, and \a MapType indicates to not delete this array, array
9331/// initialization code is generated. If \a IsInit is false, and \a MapType
9332/// indicates to not this array, array deletion code is generated.
9334 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9335 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9336 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9337 bool IsInit) {
9338 StringRef Prefix = IsInit ? ".init" : ".del";
9339
9340 // Evaluate if this is an array section.
9341 llvm::BasicBlock *BodyBB =
9342 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9343 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9344 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9345 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9346 MapType,
9347 MapperCGF.Builder.getInt64(
9348 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9349 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9350 llvm::Value *DeleteCond;
9351 llvm::Value *Cond;
9352 if (IsInit) {
9353 // base != begin?
9354 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9355 // IsPtrAndObj?
9356 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9357 MapType,
9358 MapperCGF.Builder.getInt64(
9359 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9360 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9361 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9362 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9363 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9364 DeleteCond = MapperCGF.Builder.CreateIsNull(
9365 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9366 } else {
9367 Cond = IsArray;
9368 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9369 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9370 }
9371 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9372 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9373
9374 MapperCGF.EmitBlock(BodyBB);
9375 // Get the array size by multiplying element size and element number (i.e., \p
9376 // Size).
9377 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9378 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9379 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9380 // memory allocation/deletion purpose only.
9381 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9382 MapType,
9383 MapperCGF.Builder.getInt64(
9384 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9385 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9386 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9387 MapTypeArg = MapperCGF.Builder.CreateOr(
9388 MapTypeArg,
9389 MapperCGF.Builder.getInt64(
9390 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9391 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9392
9393 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9394 // data structure.
9395 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9396 ArraySize, MapTypeArg, MapName};
9397 MapperCGF.EmitRuntimeCall(
9398 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9399 OMPRTL___tgt_push_mapper_component),
9400 OffloadingArgs);
9401}
9402
9404 const OMPDeclareMapperDecl *D) {
9405 auto I = UDMMap.find(D);
9406 if (I != UDMMap.end())
9407 return I->second;
9409 return UDMMap.lookup(D);
9410}
9411
9414 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9415 const OMPLoopDirective &D)>
9416 SizeEmitter) {
9417 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9418 const OMPExecutableDirective *TD = &D;
9419 // Get nested teams distribute kind directive, if any. For now, treat
9420 // 'target_teams_loop' as if it's really a target_teams_distribute.
9421 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9422 Kind != OMPD_target_teams_loop)
9424 if (!TD)
9425 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9426
9427 const auto *LD = cast<OMPLoopDirective>(TD);
9428 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9429 return NumIterations;
9430 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9431}
9432
9433static void
9434emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9437 bool RequiresOuterTask, const CapturedStmt &CS,
9438 bool OffloadingMandatory, CodeGenFunction &CGF) {
9439 if (OffloadingMandatory) {
9440 CGF.Builder.CreateUnreachable();
9441 } else {
9442 if (RequiresOuterTask) {
9443 CapturedVars.clear();
9444 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9445 }
9446 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9447 CapturedVars);
9448 }
9449}
9450
9451static llvm::Value *emitDeviceID(
9452 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9453 CodeGenFunction &CGF) {
9454 // Emit device ID if any.
9455 llvm::Value *DeviceID;
9456 if (Device.getPointer()) {
9457 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9458 Device.getInt() == OMPC_DEVICE_device_num) &&
9459 "Expected device_num modifier.");
9460 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9461 DeviceID =
9462 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9463 } else {
9464 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9465 }
9466 return DeviceID;
9467}
9468
9470 CodeGenFunction &CGF) {
9471 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9472
9473 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9474 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9475 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9476 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9477 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9478 /*isSigned=*/false);
9479 }
9480 return DynCGroupMem;
9481}
9482
9484 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9486 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9487 const CapturedStmt &CS, bool OffloadingMandatory,
9488 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9489 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9490 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9491 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9492 const OMPLoopDirective &D)>
9493 SizeEmitter,
9494 CodeGenFunction &CGF, CodeGenModule &CGM) {
9495 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9496
9497 // Fill up the arrays with all the captured variables.
9498 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9499
9500 // Get mappable expression information.
9501 MappableExprsHandler MEHandler(D, CGF);
9502 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9504
9505 auto RI = CS.getCapturedRecordDecl()->field_begin();
9506 auto *CV = CapturedVars.begin();
9508 CE = CS.capture_end();
9509 CI != CE; ++CI, ++RI, ++CV) {
9510 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9511 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9512
9513 // VLA sizes are passed to the outlined region by copy and do not have map
9514 // information associated.
9515 if (CI->capturesVariableArrayType()) {
9516 CurInfo.Exprs.push_back(nullptr);
9517 CurInfo.BasePointers.push_back(*CV);
9518 CurInfo.DevicePtrDecls.push_back(nullptr);
9519 CurInfo.DevicePointers.push_back(
9520 MappableExprsHandler::DeviceInfoTy::None);
9521 CurInfo.Pointers.push_back(*CV);
9522 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9523 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9524 // Copy to the device as an argument. No need to retrieve it.
9525 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9526 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9527 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9528 CurInfo.Mappers.push_back(nullptr);
9529 } else {
9530 // If we have any information in the map clause, we use it, otherwise we
9531 // just do a default mapping.
9532 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9533 if (!CI->capturesThis())
9534 MappedVarSet.insert(CI->getCapturedVar());
9535 else
9536 MappedVarSet.insert(nullptr);
9537 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9538 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9539 // Generate correct mapping for variables captured by reference in
9540 // lambdas.
9541 if (CI->capturesVariable())
9542 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9543 CurInfo, LambdaPointers);
9544 }
9545 // We expect to have at least an element of information for this capture.
9546 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9547 "Non-existing map pointer for capture!");
9548 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9549 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9550 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9551 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9552 "Inconsistent map information sizes!");
9553
9554 // If there is an entry in PartialStruct it means we have a struct with
9555 // individual members mapped. Emit an extra combined entry.
9556 if (PartialStruct.Base.isValid()) {
9557 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9558 MEHandler.emitCombinedEntry(
9559 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
9560 OMPBuilder, nullptr,
9561 !PartialStruct.PreliminaryMapData.BasePointers.empty());
9562 }
9563
9564 // We need to append the results of this capture to what we already have.
9565 CombinedInfo.append(CurInfo);
9566 }
9567 // Adjust MEMBER_OF flags for the lambdas captures.
9568 MEHandler.adjustMemberOfForLambdaCaptures(
9569 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9570 CombinedInfo.Pointers, CombinedInfo.Types);
9571 // Map any list items in a map clause that were not captures because they
9572 // weren't referenced within the construct.
9573 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet);
9574
9576 // Fill up the arrays and create the arguments.
9577 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9578 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9579 llvm::codegenoptions::NoDebugInfo;
9580 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
9581 EmitDebug,
9582 /*ForEndCall=*/false);
9583
9584 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9585 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9586 CGF.VoidPtrTy, CGM.getPointerAlign());
9587 InputInfo.PointersArray =
9588 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9589 InputInfo.SizesArray =
9590 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9591 InputInfo.MappersArray =
9592 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9593 MapTypesArray = Info.RTArgs.MapTypesArray;
9594 MapNamesArray = Info.RTArgs.MapNamesArray;
9595
9596 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9597 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9598 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9599 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9600 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9601
9602 if (IsReverseOffloading) {
9603 // Reverse offloading is not supported, so just execute on the host.
9604 // FIXME: This fallback solution is incorrect since it ignores the
9605 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9606 // assert here and ensure SEMA emits an error.
9607 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9608 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9609 return;
9610 }
9611
9612 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9613 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9614
9615 llvm::Value *BasePointersArray =
9616 InputInfo.BasePointersArray.emitRawPointer(CGF);
9617 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9618 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9619 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9620
9621 auto &&EmitTargetCallFallbackCB =
9622 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9623 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9624 -> llvm::OpenMPIRBuilder::InsertPointTy {
9625 CGF.Builder.restoreIP(IP);
9626 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9627 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9628 return CGF.Builder.saveIP();
9629 };
9630
9631 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9632 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9633 llvm::Value *NumThreads =
9634 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9635 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9636 llvm::Value *NumIterations =
9637 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9638 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9639 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9640 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9641
9642 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9643 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9644 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9645
9646 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9647 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9648 DynCGGroupMem, HasNoWait);
9649
9650 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9651 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9652 DeviceID, RTLoc, AllocaIP));
9653 };
9654
9655 if (RequiresOuterTask)
9656 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9657 else
9658 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9659}
9660
9661static void
9662emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9665 bool RequiresOuterTask, const CapturedStmt &CS,
9666 bool OffloadingMandatory, CodeGenFunction &CGF) {
9667
9668 // Notify that the host version must be executed.
9669 auto &&ElseGen =
9670 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9671 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9672 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9673 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9674 };
9675
9676 if (RequiresOuterTask) {
9677 CodeGenFunction::OMPTargetDataInfo InputInfo;
9678 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9679 } else {
9680 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9681 }
9682}
9683
9686 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9687 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9688 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9689 const OMPLoopDirective &D)>
9690 SizeEmitter) {
9691 if (!CGF.HaveInsertPoint())
9692 return;
9693
9694 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9695 CGM.getLangOpts().OpenMPOffloadMandatory;
9696
9697 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9698
9699 const bool RequiresOuterTask =
9700 D.hasClausesOfKind<OMPDependClause>() ||
9701 D.hasClausesOfKind<OMPNowaitClause>() ||
9702 D.hasClausesOfKind<OMPInReductionClause>() ||
9703 (CGM.getLangOpts().OpenMP >= 51 &&
9704 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9705 D.hasClausesOfKind<OMPThreadLimitClause>());
9707 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9708 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9709 PrePostActionTy &) {
9710 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9711 };
9712 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9713
9715 llvm::Value *MapTypesArray = nullptr;
9716 llvm::Value *MapNamesArray = nullptr;
9717
9718 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9719 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9720 OutlinedFnID, &InputInfo, &MapTypesArray,
9721 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9722 PrePostActionTy &) {
9723 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9724 RequiresOuterTask, CS, OffloadingMandatory,
9725 Device, OutlinedFnID, InputInfo, MapTypesArray,
9726 MapNamesArray, SizeEmitter, CGF, CGM);
9727 };
9728
9729 auto &&TargetElseGen =
9730 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9731 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9732 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9733 CS, OffloadingMandatory, CGF);
9734 };
9735
9736 // If we have a target function ID it means that we need to support
9737 // offloading, otherwise, just execute on the host. We need to execute on host
9738 // regardless of the conditional in the if clause if, e.g., the user do not
9739 // specify target triples.
9740 if (OutlinedFnID) {
9741 if (IfCond) {
9742 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9743 } else {
9744 RegionCodeGenTy ThenRCG(TargetThenGen);
9745 ThenRCG(CGF);
9746 }
9747 } else {
9748 RegionCodeGenTy ElseRCG(TargetElseGen);
9749 ElseRCG(CGF);
9750 }
9751}
9752
9754 StringRef ParentName) {
9755 if (!S)
9756 return;
9757
9758 // Codegen OMP target directives that offload compute to the device.
9759 bool RequiresDeviceCodegen =
9760 isa<OMPExecutableDirective>(S) &&
9762 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9763
9764 if (RequiresDeviceCodegen) {
9765 const auto &E = *cast<OMPExecutableDirective>(S);
9766
9767 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9768 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9769
9770 // Is this a target region that should not be emitted as an entry point? If
9771 // so just signal we are done with this target region.
9772 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9773 return;
9774
9775 switch (E.getDirectiveKind()) {
9776 case OMPD_target:
9778 cast<OMPTargetDirective>(E));
9779 break;
9780 case OMPD_target_parallel:
9782 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9783 break;
9784 case OMPD_target_teams:
9786 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9787 break;
9788 case OMPD_target_teams_distribute:
9790 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9791 break;
9792 case OMPD_target_teams_distribute_simd:
9794 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9795 break;
9796 case OMPD_target_parallel_for:
9798 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9799 break;
9800 case OMPD_target_parallel_for_simd:
9802 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9803 break;
9804 case OMPD_target_simd:
9806 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9807 break;
9808 case OMPD_target_teams_distribute_parallel_for:
9810 CGM, ParentName,
9811 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9812 break;
9813 case OMPD_target_teams_distribute_parallel_for_simd:
9816 CGM, ParentName,
9817 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9818 break;
9819 case OMPD_target_teams_loop:
9821 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9822 break;
9823 case OMPD_target_parallel_loop:
9825 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9826 break;
9827 case OMPD_parallel:
9828 case OMPD_for:
9829 case OMPD_parallel_for:
9830 case OMPD_parallel_master:
9831 case OMPD_parallel_sections:
9832 case OMPD_for_simd:
9833 case OMPD_parallel_for_simd:
9834 case OMPD_cancel:
9835 case OMPD_cancellation_point:
9836 case OMPD_ordered:
9837 case OMPD_threadprivate:
9838 case OMPD_allocate:
9839 case OMPD_task:
9840 case OMPD_simd:
9841 case OMPD_tile:
9842 case OMPD_unroll:
9843 case OMPD_sections:
9844 case OMPD_section:
9845 case OMPD_single:
9846 case OMPD_master:
9847 case OMPD_critical:
9848 case OMPD_taskyield:
9849 case OMPD_barrier:
9850 case OMPD_taskwait:
9851 case OMPD_taskgroup:
9852 case OMPD_atomic:
9853 case OMPD_flush:
9854 case OMPD_depobj:
9855 case OMPD_scan:
9856 case OMPD_teams:
9857 case OMPD_target_data:
9858 case OMPD_target_exit_data:
9859 case OMPD_target_enter_data:
9860 case OMPD_distribute:
9861 case OMPD_distribute_simd:
9862 case OMPD_distribute_parallel_for:
9863 case OMPD_distribute_parallel_for_simd:
9864 case OMPD_teams_distribute:
9865 case OMPD_teams_distribute_simd:
9866 case OMPD_teams_distribute_parallel_for:
9867 case OMPD_teams_distribute_parallel_for_simd:
9868 case OMPD_target_update:
9869 case OMPD_declare_simd:
9870 case OMPD_declare_variant:
9871 case OMPD_begin_declare_variant:
9872 case OMPD_end_declare_variant:
9873 case OMPD_declare_target:
9874 case OMPD_end_declare_target:
9875 case OMPD_declare_reduction:
9876 case OMPD_declare_mapper:
9877 case OMPD_taskloop:
9878 case OMPD_taskloop_simd:
9879 case OMPD_master_taskloop:
9880 case OMPD_master_taskloop_simd:
9881 case OMPD_parallel_master_taskloop:
9882 case OMPD_parallel_master_taskloop_simd:
9883 case OMPD_requires:
9884 case OMPD_metadirective:
9885 case OMPD_unknown:
9886 default:
9887 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9888 }
9889 return;
9890 }
9891
9892 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9893 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9894 return;
9895
9896 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9897 return;
9898 }
9899
9900 // If this is a lambda function, look into its body.
9901 if (const auto *L = dyn_cast<LambdaExpr>(S))
9902 S = L->getBody();
9903
9904 // Keep looking for target regions recursively.
9905 for (const Stmt *II : S->children())
9906 scanForTargetRegionsFunctions(II, ParentName);
9907}
9908
9909static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9910 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9911 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9912 if (!DevTy)
9913 return false;
9914 // Do not emit device_type(nohost) functions for the host.
9915 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9916 return true;
9917 // Do not emit device_type(host) functions for the device.
9918 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9919 return true;
9920 return false;
9921}
9922
9924 // If emitting code for the host, we do not process FD here. Instead we do
9925 // the normal code generation.
9926 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9927 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9928 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9929 CGM.getLangOpts().OpenMPIsTargetDevice))
9930 return true;
9931 return false;
9932 }
9933
9934 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9935 // Try to detect target regions in the function.
9936 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9937 StringRef Name = CGM.getMangledName(GD);
9938 scanForTargetRegionsFunctions(FD->getBody(), Name);
9939 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9940 CGM.getLangOpts().OpenMPIsTargetDevice))
9941 return true;
9942 }
9943
9944 // Do not to emit function if it is not marked as declare target.
9945 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9946 AlreadyEmittedTargetDecls.count(VD) == 0;
9947}
9948
9950 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
9951 CGM.getLangOpts().OpenMPIsTargetDevice))
9952 return true;
9953
9954 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9955 return false;
9956
9957 // Check if there are Ctors/Dtors in this declaration and look for target
9958 // regions in it. We use the complete variant to produce the kernel name
9959 // mangling.
9960 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9961 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9962 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9963 StringRef ParentName =
9965 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9966 }
9967 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9968 StringRef ParentName =
9970 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9971 }
9972 }
9973
9974 // Do not to emit variable if it is not marked as declare target.
9975 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9976 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9977 cast<VarDecl>(GD.getDecl()));
9978 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9979 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9980 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9982 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9983 return true;
9984 }
9985 return false;
9986}
9987
9989 llvm::Constant *Addr) {
9990 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9991 !CGM.getLangOpts().OpenMPIsTargetDevice)
9992 return;
9993
9994 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9995 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9996
9997 // If this is an 'extern' declaration we defer to the canonical definition and
9998 // do not emit an offloading entry.
9999 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10000 VD->hasExternalStorage())
10001 return;
10002
10003 if (!Res) {
10004 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10005 // Register non-target variables being emitted in device code (debug info
10006 // may cause this).
10007 StringRef VarName = CGM.getMangledName(VD);
10008 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10009 }
10010 return;
10011 }
10012
10013 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10014 auto LinkageForVariable = [&VD, this]() {
10016 };
10017
10018 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10019 OMPBuilder.registerTargetGlobalVariable(
10022 VD->isExternallyVisible(),
10024 VD->getCanonicalDecl()->getBeginLoc()),
10025 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10026 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10029 Addr);
10030
10031 for (auto *ref : GeneratedRefs)
10033}
10034
10036 if (isa<FunctionDecl>(GD.getDecl()) ||
10037 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10038 return emitTargetFunctions(GD);
10039
10040 return emitTargetGlobalVariable(GD);
10041}
10042
10044 for (const VarDecl *VD : DeferredGlobalVariables) {
10045 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10046 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10047 if (!Res)
10048 continue;
10049 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10050 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10052 CGM.EmitGlobal(VD);
10053 } else {
10054 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10055 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10056 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10058 "Expected link clause or to clause with unified memory.");
10060 }
10061 }
10062}
10063
10065 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10066 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10067 " Expected target-based directive.");
10068}
10069
10071 for (const OMPClause *Clause : D->clauselists()) {
10072 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10074 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10075 } else if (const auto *AC =
10076 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10077 switch (AC->getAtomicDefaultMemOrderKind()) {
10078 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10079 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10080 break;
10081 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10082 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10083 break;
10084 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10085 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10086 break;
10088 break;
10089 }
10090 }
10091 }
10092}
10093
10094llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10096}
10097
10099 LangAS &AS) {
10100 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10101 return false;
10102 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10103 switch(A->getAllocatorType()) {
10104 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10105 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10106 // Not supported, fallback to the default mem space.
10107 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10108 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10109 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10110 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10111 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10112 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10113 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10114 AS = LangAS::Default;
10115 return true;
10116 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10117 llvm_unreachable("Expected predefined allocator for the variables with the "
10118 "static storage.");
10119 }
10120 return false;
10121}
10122
10125}
10126
10128 CodeGenModule &CGM)
10129 : CGM(CGM) {
10130 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10131 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10133 }
10134}
10135
10137 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10138 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10139}
10140
10142 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10143 return true;
10144
10145 const auto *D = cast<FunctionDecl>(GD.getDecl());
10146 // Do not to emit function if it is marked as declare target as it was already
10147 // emitted.
10148 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10149 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10150 if (auto *F = dyn_cast_or_null<llvm::Function>(
10152 return !F->isDeclaration();
10153 return false;
10154 }
10155 return true;
10156 }
10157
10158 return !AlreadyEmittedTargetDecls.insert(D).second;
10159}
10160
10164 llvm::Function *OutlinedFn,
10165 ArrayRef<llvm::Value *> CapturedVars) {
10166 if (!CGF.HaveInsertPoint())
10167 return;
10168
10169 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10171
10172 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10173 llvm::Value *Args[] = {
10174 RTLoc,
10175 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10176 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10178 RealArgs.append(std::begin(Args), std::end(Args));
10179 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10180
10181 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10182 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10183 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10184}
10185
10187 const Expr *NumTeams,
10188 const Expr *ThreadLimit,
10190 if (!CGF.HaveInsertPoint())
10191 return;
10192
10193 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10194
10195 llvm::Value *NumTeamsVal =
10196 NumTeams
10197 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10198 CGF.CGM.Int32Ty, /* isSigned = */ true)
10199 : CGF.Builder.getInt32(0);
10200
10201 llvm::Value *ThreadLimitVal =
10202 ThreadLimit
10203 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10204 CGF.CGM.Int32Ty, /* isSigned = */ true)
10205 : CGF.Builder.getInt32(0);
10206
10207 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10208 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10209 ThreadLimitVal};
10210 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10211 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10212 PushNumTeamsArgs);
10213}
10214
10216 const Expr *ThreadLimit,
10218 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10219 llvm::Value *ThreadLimitVal =
10220 ThreadLimit
10221 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10222 CGF.CGM.Int32Ty, /* isSigned = */ true)
10223 : CGF.Builder.getInt32(0);
10224
10225 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10226 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10227 ThreadLimitVal};
10228 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10229 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10230 ThreadLimitArgs);
10231}
10232
10234 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10235 const Expr *Device, const RegionCodeGenTy &CodeGen,
10237 if (!CGF.HaveInsertPoint())
10238 return;
10239
10240 // Action used to replace the default codegen action and turn privatization
10241 // off.
10242 PrePostActionTy NoPrivAction;
10243
10244 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10245
10246 llvm::Value *IfCondVal = nullptr;
10247 if (IfCond)
10248 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10249
10250 // Emit device ID if any.
10251 llvm::Value *DeviceID = nullptr;
10252 if (Device) {
10253 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10254 CGF.Int64Ty, /*isSigned=*/true);
10255 } else {
10256 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10257 }
10258
10259 // Fill up the arrays with all the mapped variables.
10260 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10261 auto GenMapInfoCB =
10262 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10263 CGF.Builder.restoreIP(CodeGenIP);
10264 // Get map clause information.
10265 MappableExprsHandler MEHandler(D, CGF);
10266 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10267
10268 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10269 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10270 };
10271 if (CGM.getCodeGenOpts().getDebugInfo() !=
10272 llvm::codegenoptions::NoDebugInfo) {
10273 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10274 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10275 FillInfoMap);
10276 }
10277
10278 return CombinedInfo;
10279 };
10280 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10281 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10282 CGF.Builder.restoreIP(CodeGenIP);
10283 switch (BodyGenType) {
10284 case BodyGenTy::Priv:
10285 if (!Info.CaptureDeviceAddrMap.empty())
10286 CodeGen(CGF);
10287 break;
10288 case BodyGenTy::DupNoPriv:
10289 if (!Info.CaptureDeviceAddrMap.empty()) {
10290 CodeGen.setAction(NoPrivAction);
10291 CodeGen(CGF);
10292 }
10293 break;
10294 case BodyGenTy::NoPriv:
10295 if (Info.CaptureDeviceAddrMap.empty()) {
10296 CodeGen.setAction(NoPrivAction);
10297 CodeGen(CGF);
10298 }
10299 break;
10300 }
10301 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10302 CGF.Builder.GetInsertPoint());
10303 };
10304
10305 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10306 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10307 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10308 }
10309 };
10310
10311 auto CustomMapperCB = [&](unsigned int I) {
10312 llvm::Value *MFunc = nullptr;
10313 if (CombinedInfo.Mappers[I]) {
10314 Info.HasMapper = true;
10316 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10317 }
10318 return MFunc;
10319 };
10320
10321 // Source location for the ident struct
10322 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10323
10324 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10325 CGF.AllocaInsertPt->getIterator());
10326 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10327 CGF.Builder.GetInsertPoint());
10328 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10329 CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10330 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10331 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10332}
10333
10335 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10336 const Expr *Device) {
10337 if (!CGF.HaveInsertPoint())
10338 return;
10339
10340 assert((isa<OMPTargetEnterDataDirective>(D) ||
10341 isa<OMPTargetExitDataDirective>(D) ||
10342 isa<OMPTargetUpdateDirective>(D)) &&
10343 "Expecting either target enter, exit data, or update directives.");
10344
10346 llvm::Value *MapTypesArray = nullptr;
10347 llvm::Value *MapNamesArray = nullptr;
10348 // Generate the code for the opening of the data environment.
10349 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10350 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10351 // Emit device ID if any.
10352 llvm::Value *DeviceID = nullptr;
10353 if (Device) {
10354 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10355 CGF.Int64Ty, /*isSigned=*/true);
10356 } else {
10357 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10358 }
10359
10360 // Emit the number of elements in the offloading arrays.
10361 llvm::Constant *PointerNum =
10362 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10363
10364 // Source location for the ident struct
10365 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10366
10367 SmallVector<llvm::Value *, 13> OffloadingArgs(
10368 {RTLoc, DeviceID, PointerNum,
10369 InputInfo.BasePointersArray.emitRawPointer(CGF),
10370 InputInfo.PointersArray.emitRawPointer(CGF),
10371 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10372 InputInfo.MappersArray.emitRawPointer(CGF)});
10373
10374 // Select the right runtime function call for each standalone
10375 // directive.
10376 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10377 RuntimeFunction RTLFn;
10378 switch (D.getDirectiveKind()) {
10379 case OMPD_target_enter_data:
10380 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10381 : OMPRTL___tgt_target_data_begin_mapper;
10382 break;
10383 case OMPD_target_exit_data:
10384 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10385 : OMPRTL___tgt_target_data_end_mapper;
10386 break;
10387 case OMPD_target_update:
10388 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10389 : OMPRTL___tgt_target_data_update_mapper;
10390 break;
10391 case OMPD_parallel:
10392 case OMPD_for:
10393 case OMPD_parallel_for:
10394 case OMPD_parallel_master:
10395 case OMPD_parallel_sections:
10396 case OMPD_for_simd:
10397 case OMPD_parallel_for_simd:
10398 case OMPD_cancel:
10399 case OMPD_cancellation_point:
10400 case OMPD_ordered:
10401 case OMPD_threadprivate:
10402 case OMPD_allocate:
10403 case OMPD_task:
10404 case OMPD_simd:
10405 case OMPD_tile:
10406 case OMPD_unroll:
10407 case OMPD_sections:
10408 case OMPD_section:
10409 case OMPD_single:
10410 case OMPD_master:
10411 case OMPD_critical:
10412 case OMPD_taskyield:
10413 case OMPD_barrier:
10414 case OMPD_taskwait:
10415 case OMPD_taskgroup:
10416 case OMPD_atomic:
10417 case OMPD_flush:
10418 case OMPD_depobj:
10419 case OMPD_scan:
10420 case OMPD_teams:
10421 case OMPD_target_data:
10422 case OMPD_distribute:
10423 case OMPD_distribute_simd:
10424 case OMPD_distribute_parallel_for:
10425 case OMPD_distribute_parallel_for_simd:
10426 case OMPD_teams_distribute:
10427 case OMPD_teams_distribute_simd:
10428 case OMPD_teams_distribute_parallel_for:
10429 case OMPD_teams_distribute_parallel_for_simd:
10430 case OMPD_declare_simd:
10431 case OMPD_declare_variant:
10432 case OMPD_begin_declare_variant:
10433 case OMPD_end_declare_variant:
10434 case OMPD_declare_target:
10435 case OMPD_end_declare_target:
10436 case OMPD_declare_reduction:
10437 case OMPD_declare_mapper:
10438 case OMPD_taskloop:
10439 case OMPD_taskloop_simd:
10440 case OMPD_master_taskloop:
10441 case OMPD_master_taskloop_simd:
10442 case OMPD_parallel_master_taskloop:
10443 case OMPD_parallel_master_taskloop_simd:
10444 case OMPD_target:
10445 case OMPD_target_simd:
10446 case OMPD_target_teams_distribute:
10447 case OMPD_target_teams_distribute_simd:
10448 case OMPD_target_teams_distribute_parallel_for:
10449 case OMPD_target_teams_distribute_parallel_for_simd:
10450 case OMPD_target_teams:
10451 case OMPD_target_parallel:
10452 case OMPD_target_parallel_for:
10453 case OMPD_target_parallel_for_simd:
10454 case OMPD_requires:
10455 case OMPD_metadirective:
10456 case OMPD_unknown:
10457 default:
10458 llvm_unreachable("Unexpected standalone target data directive.");
10459 break;
10460 }
10461 if (HasNowait) {
10462 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10463 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10464 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10465 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10466 }
10467 CGF.EmitRuntimeCall(
10468 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10469 OffloadingArgs);
10470 };
10471
10472 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10473 &MapNamesArray](CodeGenFunction &CGF,
10474 PrePostActionTy &) {
10475 // Fill up the arrays with all the mapped variables.
10476 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10477
10478 // Get map clause information.
10479 MappableExprsHandler MEHandler(D, CGF);
10480 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10481
10483 // Fill up the arrays and create the arguments.
10484 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10485 /*IsNonContiguous=*/true);
10486 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10487 D.hasClausesOfKind<OMPNowaitClause>();
10488 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10489 llvm::codegenoptions::NoDebugInfo;
10490 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10491 EmitDebug,
10492 /*ForEndCall=*/false);
10493 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10494 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10496 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10498 InputInfo.SizesArray =
10499 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10500 InputInfo.MappersArray =
10501 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10502 MapTypesArray = Info.RTArgs.MapTypesArray;
10503 MapNamesArray = Info.RTArgs.MapNamesArray;
10504 if (RequiresOuterTask)
10505 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10506 else
10507 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10508 };
10509
10510 if (IfCond) {
10511 emitIfClause(CGF, IfCond, TargetThenGen,
10512 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10513 } else {
10514 RegionCodeGenTy ThenRCG(TargetThenGen);
10515 ThenRCG(CGF);
10516 }
10517}
10518
10519namespace {
10520 /// Kind of parameter in a function with 'declare simd' directive.
10521enum ParamKindTy {
10522 Linear,
10523 LinearRef,
10524 LinearUVal,
10525 LinearVal,
10526 Uniform,
10527 Vector,
10528};
10529/// Attribute set of the parameter.
10530struct ParamAttrTy {
10531 ParamKindTy Kind = Vector;
10532 llvm::APSInt StrideOrArg;
10533 llvm::APSInt Alignment;
10534 bool HasVarStride = false;
10535};
10536} // namespace
10537
10538static unsigned evaluateCDTSize(const FunctionDecl *FD,
10539 ArrayRef<ParamAttrTy> ParamAttrs) {
10540 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10541 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10542 // of that clause. The VLEN value must be power of 2.
10543 // In other case the notion of the function`s "characteristic data type" (CDT)
10544 // is used to compute the vector length.
10545 // CDT is defined in the following order:
10546 // a) For non-void function, the CDT is the return type.
10547 // b) If the function has any non-uniform, non-linear parameters, then the
10548 // CDT is the type of the first such parameter.
10549 // c) If the CDT determined by a) or b) above is struct, union, or class
10550 // type which is pass-by-value (except for the type that maps to the
10551 // built-in complex data type), the characteristic data type is int.
10552 // d) If none of the above three cases is applicable, the CDT is int.
10553 // The VLEN is then determined based on the CDT and the size of vector
10554 // register of that ISA for which current vector version is generated. The
10555 // VLEN is computed using the formula below:
10556 // VLEN = sizeof(vector_register) / sizeof(CDT),
10557 // where vector register size specified in section 3.2.1 Registers and the
10558 // Stack Frame of original AMD64 ABI document.
10559 QualType RetType = FD->getReturnType();
10560 if (RetType.isNull())
10561 return 0;
10562 ASTContext &C = FD->getASTContext();
10563 QualType CDT;
10564 if (!RetType.isNull() && !RetType->isVoidType()) {
10565 CDT = RetType;
10566 } else {
10567 unsigned Offset = 0;
10568 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10569 if (ParamAttrs[Offset].Kind == Vector)
10570 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10571 ++Offset;
10572 }
10573 if (CDT.isNull()) {
10574 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10575 if (ParamAttrs[I + Offset].Kind == Vector) {
10576 CDT = FD->getParamDecl(I)->getType();
10577 break;
10578 }
10579 }
10580 }
10581 }
10582 if (CDT.isNull())
10583 CDT = C.IntTy;
10584 CDT = CDT->getCanonicalTypeUnqualified();
10585 if (CDT->isRecordType() || CDT->isUnionType())
10586 CDT = C.IntTy;
10587 return C.getTypeSize(CDT);
10588}
10589
10590/// Mangle the parameter part of the vector function name according to
10591/// their OpenMP classification. The mangling function is defined in
10592/// section 4.5 of the AAVFABI(2021Q1).
10593static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10594 SmallString<256> Buffer;
10595 llvm::raw_svector_ostream Out(Buffer);
10596 for (const auto &ParamAttr : ParamAttrs) {
10597 switch (ParamAttr.Kind) {
10598 case Linear:
10599 Out << 'l';
10600 break;
10601 case LinearRef:
10602 Out << 'R';
10603 break;
10604 case LinearUVal:
10605 Out << 'U';
10606 break;
10607 case LinearVal:
10608 Out << 'L';
10609 break;
10610 case Uniform:
10611 Out << 'u';
10612 break;
10613 case Vector:
10614 Out << 'v';
10615 break;
10616 }
10617 if (ParamAttr.HasVarStride)
10618 Out << "s" << ParamAttr.StrideOrArg;
10619 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10620 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10621 // Don't print the step value if it is not present or if it is
10622 // equal to 1.
10623 if (ParamAttr.StrideOrArg < 0)
10624 Out << 'n' << -ParamAttr.StrideOrArg;
10625 else if (ParamAttr.StrideOrArg != 1)
10626 Out << ParamAttr.StrideOrArg;
10627 }
10628
10629 if (!!ParamAttr.Alignment)
10630 Out << 'a' << ParamAttr.Alignment;
10631 }
10632
10633 return std::string(Out.str());
10634}
10635
10636static void
10637emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10638 const llvm::APSInt &VLENVal,
10639 ArrayRef<ParamAttrTy> ParamAttrs,
10640 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10641 struct ISADataTy {
10642 char ISA;
10643 unsigned VecRegSize;
10644 };
10645 ISADataTy ISAData[] = {
10646 {
10647 'b', 128
10648 }, // SSE
10649 {
10650 'c', 256
10651 }, // AVX
10652 {
10653 'd', 256
10654 }, // AVX2
10655 {
10656 'e', 512
10657 }, // AVX512
10658 };
10660 switch (State) {
10661 case OMPDeclareSimdDeclAttr::BS_Undefined:
10662 Masked.push_back('N');
10663 Masked.push_back('M');
10664 break;
10665 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10666 Masked.push_back('N');
10667 break;
10668 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10669 Masked.push_back('M');
10670 break;
10671 }
10672 for (char Mask : Masked) {
10673 for (const ISADataTy &Data : ISAData) {
10674 SmallString<256> Buffer;
10675 llvm::raw_svector_ostream Out(Buffer);
10676 Out << "_ZGV" << Data.ISA << Mask;
10677 if (!VLENVal) {
10678 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10679 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10680 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10681 } else {
10682 Out << VLENVal;
10683 }
10684 Out << mangleVectorParameters(ParamAttrs);
10685 Out << '_' << Fn->getName();
10686 Fn->addFnAttr(Out.str());
10687 }
10688 }
10689}
10690
10691// This are the Functions that are needed to mangle the name of the
10692// vector functions generated by the compiler, according to the rules
10693// defined in the "Vector Function ABI specifications for AArch64",
10694// available at
10695// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10696
10697/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10698static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10699 QT = QT.getCanonicalType();
10700
10701 if (QT->isVoidType())
10702 return false;
10703
10704 if (Kind == ParamKindTy::Uniform)
10705 return false;
10706
10707 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10708 return false;
10709
10710 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10711 !QT->isReferenceType())
10712 return false;
10713
10714 return true;
10715}
10716
10717/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10719 QT = QT.getCanonicalType();
10720 unsigned Size = C.getTypeSize(QT);
10721
10722 // Only scalars and complex within 16 bytes wide set PVB to true.
10723 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10724 return false;
10725
10726 if (QT->isFloatingType())
10727 return true;
10728
10729 if (QT->isIntegerType())
10730 return true;
10731
10732 if (QT->isPointerType())
10733 return true;
10734
10735 // TODO: Add support for complex types (section 3.1.2, item 2).
10736
10737 return false;
10738}
10739
10740/// Computes the lane size (LS) of a return type or of an input parameter,
10741/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10742/// TODO: Add support for references, section 3.2.1, item 1.
10743static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10744 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10746 if (getAArch64PBV(PTy, C))
10747 return C.getTypeSize(PTy);
10748 }
10749 if (getAArch64PBV(QT, C))
10750 return C.getTypeSize(QT);
10751
10752 return C.getTypeSize(C.getUIntPtrType());
10753}
10754
10755// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10756// signature of the scalar function, as defined in 3.2.2 of the
10757// AAVFABI.
10758static std::tuple<unsigned, unsigned, bool>
10760 QualType RetType = FD->getReturnType().getCanonicalType();
10761
10762 ASTContext &C = FD->getASTContext();
10763
10764 bool OutputBecomesInput = false;
10765
10767 if (!RetType->isVoidType()) {
10768 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10769 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10770 OutputBecomesInput = true;
10771 }
10772 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10774 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10775 }
10776
10777 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10778 // The LS of a function parameter / return value can only be a power
10779 // of 2, starting from 8 bits, up to 128.
10780 assert(llvm::all_of(Sizes,
10781 [](unsigned Size) {
10782 return Size == 8 || Size == 16 || Size == 32 ||
10783 Size == 64 || Size == 128;
10784 }) &&
10785 "Invalid size");
10786
10787 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10788 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10789 OutputBecomesInput);
10790}
10791
10792// Function used to add the attribute. The parameter `VLEN` is
10793// templated to allow the use of "x" when targeting scalable functions
10794// for SVE.
10795template <typename T>
10796static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10797 char ISA, StringRef ParSeq,
10798 StringRef MangledName, bool OutputBecomesInput,
10799 llvm::Function *Fn) {
10800 SmallString<256> Buffer;
10801 llvm::raw_svector_ostream Out(Buffer);
10802 Out << Prefix << ISA << LMask << VLEN;
10803 if (OutputBecomesInput)
10804 Out << "v";
10805 Out << ParSeq << "_" << MangledName;
10806 Fn->addFnAttr(Out.str());
10807}
10808
10809// Helper function to generate the Advanced SIMD names depending on
10810// the value of the NDS when simdlen is not present.
10811static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10812 StringRef Prefix, char ISA,
10813 StringRef ParSeq, StringRef MangledName,
10814 bool OutputBecomesInput,
10815 llvm::Function *Fn) {
10816 switch (NDS) {
10817 case 8:
10818 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10819 OutputBecomesInput, Fn);
10820 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10821 OutputBecomesInput, Fn);
10822 break;
10823 case 16:
10824 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10825 OutputBecomesInput, Fn);
10826 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10827 OutputBecomesInput, Fn);
10828 break;
10829 case 32:
10830 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10831 OutputBecomesInput, Fn);
10832 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10833 OutputBecomesInput, Fn);
10834 break;
10835 case 64:
10836 case 128:
10837 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10838 OutputBecomesInput, Fn);
10839 break;
10840 default:
10841 llvm_unreachable("Scalar type is too wide.");
10842 }
10843}
10844
10845/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10847 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10848 ArrayRef<ParamAttrTy> ParamAttrs,
10849 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10850 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10851
10852 // Get basic data for building the vector signature.
10853 const auto Data = getNDSWDS(FD, ParamAttrs);
10854 const unsigned NDS = std::get<0>(Data);
10855 const unsigned WDS = std::get<1>(Data);
10856 const bool OutputBecomesInput = std::get<2>(Data);
10857
10858 // Check the values provided via `simdlen` by the user.
10859 // 1. A `simdlen(1)` doesn't produce vector signatures,
10860 if (UserVLEN == 1) {
10861 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10863 "The clause simdlen(1) has no effect when targeting aarch64.");
10864 CGM.getDiags().Report(SLoc, DiagID);
10865 return;
10866 }
10867
10868 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10869 // Advanced SIMD output.
10870 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10871 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10872 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10873 "power of 2 when targeting Advanced SIMD.");
10874 CGM.getDiags().Report(SLoc, DiagID);
10875 return;
10876 }
10877
10878 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10879 // limits.
10880 if (ISA == 's' && UserVLEN != 0) {
10881 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10882 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10883 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10884 "lanes in the architectural constraints "
10885 "for SVE (min is 128-bit, max is "
10886 "2048-bit, by steps of 128-bit)");
10887 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10888 return;
10889 }
10890 }
10891
10892 // Sort out parameter sequence.
10893 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10894 StringRef Prefix = "_ZGV";
10895 // Generate simdlen from user input (if any).
10896 if (UserVLEN) {
10897 if (ISA == 's') {
10898 // SVE generates only a masked function.
10899 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10900 OutputBecomesInput, Fn);
10901 } else {
10902 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10903 // Advanced SIMD generates one or two functions, depending on
10904 // the `[not]inbranch` clause.
10905 switch (State) {
10906 case OMPDeclareSimdDeclAttr::BS_Undefined:
10907 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10908 OutputBecomesInput, Fn);
10909 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10910 OutputBecomesInput, Fn);
10911 break;
10912 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10913 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10914 OutputBecomesInput, Fn);
10915 break;
10916 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10917 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10918 OutputBecomesInput, Fn);
10919 break;
10920 }
10921 }
10922 } else {
10923 // If no user simdlen is provided, follow the AAVFABI rules for
10924 // generating the vector length.
10925 if (ISA == 's') {
10926 // SVE, section 3.4.1, item 1.
10927 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10928 OutputBecomesInput, Fn);
10929 } else {
10930 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10931 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10932 // two vector names depending on the use of the clause
10933 // `[not]inbranch`.
10934 switch (State) {
10935 case OMPDeclareSimdDeclAttr::BS_Undefined:
10936 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10937 OutputBecomesInput, Fn);
10938 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10939 OutputBecomesInput, Fn);
10940 break;
10941 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10942 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10943 OutputBecomesInput, Fn);
10944 break;
10945 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10946 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10947 OutputBecomesInput, Fn);
10948 break;
10949 }
10950 }
10951 }
10952}
10953
10955 llvm::Function *Fn) {
10957 FD = FD->getMostRecentDecl();
10958 while (FD) {
10959 // Map params to their positions in function decl.
10960 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10961 if (isa<CXXMethodDecl>(FD))
10962 ParamPositions.try_emplace(FD, 0);
10963 unsigned ParamPos = ParamPositions.size();
10964 for (const ParmVarDecl *P : FD->parameters()) {
10965 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10966 ++ParamPos;
10967 }
10968 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10969 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10970 // Mark uniform parameters.
10971 for (const Expr *E : Attr->uniforms()) {
10972 E = E->IgnoreParenImpCasts();
10973 unsigned Pos;
10974 if (isa<CXXThisExpr>(E)) {
10975 Pos = ParamPositions[FD];
10976 } else {
10977 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10978 ->getCanonicalDecl();
10979 auto It = ParamPositions.find(PVD);
10980 assert(It != ParamPositions.end() && "Function parameter not found");
10981 Pos = It->second;
10982 }
10983 ParamAttrs[Pos].Kind = Uniform;
10984 }
10985 // Get alignment info.
10986 auto *NI = Attr->alignments_begin();
10987 for (const Expr *E : Attr->aligneds()) {
10988 E = E->IgnoreParenImpCasts();
10989 unsigned Pos;
10990 QualType ParmTy;
10991 if (isa<CXXThisExpr>(E)) {
10992 Pos = ParamPositions[FD];
10993 ParmTy = E->getType();
10994 } else {
10995 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10996 ->getCanonicalDecl();
10997 auto It = ParamPositions.find(PVD);
10998 assert(It != ParamPositions.end() && "Function parameter not found");
10999 Pos = It->second;
11000 ParmTy = PVD->getType();
11001 }
11002 ParamAttrs[Pos].Alignment =
11003 (*NI)
11004 ? (*NI)->EvaluateKnownConstInt(C)
11005 : llvm::APSInt::getUnsigned(
11006 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11007 .getQuantity());
11008 ++NI;
11009 }
11010 // Mark linear parameters.
11011 auto *SI = Attr->steps_begin();
11012 auto *MI = Attr->modifiers_begin();
11013 for (const Expr *E : Attr->linears()) {
11014 E = E->IgnoreParenImpCasts();
11015 unsigned Pos;
11016 bool IsReferenceType = false;
11017 // Rescaling factor needed to compute the linear parameter
11018 // value in the mangled name.
11019 unsigned PtrRescalingFactor = 1;
11020 if (isa<CXXThisExpr>(E)) {
11021 Pos = ParamPositions[FD];
11022 auto *P = cast<PointerType>(E->getType());
11023 PtrRescalingFactor = CGM.getContext()
11024 .getTypeSizeInChars(P->getPointeeType())
11025 .getQuantity();
11026 } else {
11027 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11028 ->getCanonicalDecl();
11029 auto It = ParamPositions.find(PVD);
11030 assert(It != ParamPositions.end() && "Function parameter not found");
11031 Pos = It->second;
11032 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11033 PtrRescalingFactor = CGM.getContext()
11034 .getTypeSizeInChars(P->getPointeeType())
11035 .getQuantity();
11036 else if (PVD->getType()->isReferenceType()) {
11037 IsReferenceType = true;
11038 PtrRescalingFactor =
11039 CGM.getContext()
11040 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11041 .getQuantity();
11042 }
11043 }
11044 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11045 if (*MI == OMPC_LINEAR_ref)
11046 ParamAttr.Kind = LinearRef;
11047 else if (*MI == OMPC_LINEAR_uval)
11048 ParamAttr.Kind = LinearUVal;
11049 else if (IsReferenceType)
11050 ParamAttr.Kind = LinearVal;
11051 else
11052 ParamAttr.Kind = Linear;
11053 // Assuming a stride of 1, for `linear` without modifiers.
11054 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11055 if (*SI) {
11057 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11058 if (const auto *DRE =
11059 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11060 if (const auto *StridePVD =
11061 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11062 ParamAttr.HasVarStride = true;
11063 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11064 assert(It != ParamPositions.end() &&
11065 "Function parameter not found");
11066 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11067 }
11068 }
11069 } else {
11070 ParamAttr.StrideOrArg = Result.Val.getInt();
11071 }
11072 }
11073 // If we are using a linear clause on a pointer, we need to
11074 // rescale the value of linear_step with the byte size of the
11075 // pointee type.
11076 if (!ParamAttr.HasVarStride &&
11077 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11078 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11079 ++SI;
11080 ++MI;
11081 }
11082 llvm::APSInt VLENVal;
11083 SourceLocation ExprLoc;
11084 const Expr *VLENExpr = Attr->getSimdlen();
11085 if (VLENExpr) {
11086 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11087 ExprLoc = VLENExpr->getExprLoc();
11088 }
11089 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11090 if (CGM.getTriple().isX86()) {
11091 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11092 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11093 unsigned VLEN = VLENVal.getExtValue();
11094 StringRef MangledName = Fn->getName();
11095 if (CGM.getTarget().hasFeature("sve"))
11096 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11097 MangledName, 's', 128, Fn, ExprLoc);
11098 else if (CGM.getTarget().hasFeature("neon"))
11099 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11100 MangledName, 'n', 128, Fn, ExprLoc);
11101 }
11102 }
11103 FD = FD->getPreviousDecl();
11104 }
11105}
11106
11107namespace {
11108/// Cleanup action for doacross support.
11109class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11110public:
11111 static const int DoacrossFinArgs = 2;
11112
11113private:
11114 llvm::FunctionCallee RTLFn;
11115 llvm::Value *Args[DoacrossFinArgs];
11116
11117public:
11118 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11119 ArrayRef<llvm::Value *> CallArgs)
11120 : RTLFn(RTLFn) {
11121 assert(CallArgs.size() == DoacrossFinArgs);
11122 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11123 }
11124 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11125 if (!CGF.HaveInsertPoint())
11126 return;
11127 CGF.EmitRuntimeCall(RTLFn, Args);
11128 }
11129};
11130} // namespace
11131
11133 const OMPLoopDirective &D,
11134 ArrayRef<Expr *> NumIterations) {
11135 if (!CGF.HaveInsertPoint())
11136 return;
11137
11139 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11140 RecordDecl *RD;
11141 if (KmpDimTy.isNull()) {
11142 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11143 // kmp_int64 lo; // lower
11144 // kmp_int64 up; // upper
11145 // kmp_int64 st; // stride
11146 // };
11147 RD = C.buildImplicitRecord("kmp_dim");
11148 RD->startDefinition();
11149 addFieldToRecordDecl(C, RD, Int64Ty);
11150 addFieldToRecordDecl(C, RD, Int64Ty);
11151 addFieldToRecordDecl(C, RD, Int64Ty);
11152 RD->completeDefinition();
11153 KmpDimTy = C.getRecordType(RD);
11154 } else {
11155 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11156 }
11157 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11158 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11160
11161 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11162 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11163 enum { LowerFD = 0, UpperFD, StrideFD };
11164 // Fill dims with data.
11165 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11166 LValue DimsLVal = CGF.MakeAddrLValue(
11167 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11168 // dims.upper = num_iterations;
11169 LValue UpperLVal = CGF.EmitLValueForField(
11170 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11171 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11172 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11173 Int64Ty, NumIterations[I]->getExprLoc());
11174 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11175 // dims.stride = 1;
11176 LValue StrideLVal = CGF.EmitLValueForField(
11177 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11178 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11179 StrideLVal);
11180 }
11181
11182 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11183 // kmp_int32 num_dims, struct kmp_dim * dims);
11184 llvm::Value *Args[] = {
11186 getThreadID(CGF, D.getBeginLoc()),
11187 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11189 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11190 CGM.VoidPtrTy)};
11191
11192 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11193 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11194 CGF.EmitRuntimeCall(RTLFn, Args);
11195 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11197 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11198 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11199 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11200 llvm::ArrayRef(FiniArgs));
11201}
11202
11203template <typename T>
11205 const T *C, llvm::Value *ULoc,
11206 llvm::Value *ThreadID) {
11207 QualType Int64Ty =
11208 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11209 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11211 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11212 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11213 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11214 const Expr *CounterVal = C->getLoopData(I);
11215 assert(CounterVal);
11216 llvm::Value *CntVal = CGF.EmitScalarConversion(
11217 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11218 CounterVal->getExprLoc());
11219 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11220 /*Volatile=*/false, Int64Ty);
11221 }
11222 llvm::Value *Args[] = {
11223 ULoc, ThreadID,
11224 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11225 llvm::FunctionCallee RTLFn;
11226 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11227 OMPDoacrossKind<T> ODK;
11228 if (ODK.isSource(C)) {
11229 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11230 OMPRTL___kmpc_doacross_post);
11231 } else {
11232 assert(ODK.isSink(C) && "Expect sink modifier.");
11233 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11234 OMPRTL___kmpc_doacross_wait);
11235 }
11236 CGF.EmitRuntimeCall(RTLFn, Args);
11237}
11238
11240 const OMPDependClause *C) {
11241 return EmitDoacrossOrdered<OMPDependClause>(
11242 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11243 getThreadID(CGF, C->getBeginLoc()));
11244}
11245
11247 const OMPDoacrossClause *C) {
11248 return EmitDoacrossOrdered<OMPDoacrossClause>(
11249 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11250 getThreadID(CGF, C->getBeginLoc()));
11251}
11252
11254 llvm::FunctionCallee Callee,
11255 ArrayRef<llvm::Value *> Args) const {
11256 assert(Loc.isValid() && "Outlined function call location must be valid.");
11258
11259 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11260 if (Fn->doesNotThrow()) {
11261 CGF.EmitNounwindRuntimeCall(Fn, Args);
11262 return;
11263 }
11264 }
11265 CGF.EmitRuntimeCall(Callee, Args);
11266}
11267
11269 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11270 ArrayRef<llvm::Value *> Args) const {
11271 emitCall(CGF, Loc, OutlinedFn, Args);
11272}
11273
11275 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11276 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11278}
11279
11281 const VarDecl *NativeParam,
11282 const VarDecl *TargetParam) const {
11283 return CGF.GetAddrOfLocalVar(NativeParam);
11284}
11285
11286/// Return allocator value from expression, or return a null allocator (default
11287/// when no allocator specified).
11288static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11289 const Expr *Allocator) {
11290 llvm::Value *AllocVal;
11291 if (Allocator) {
11292 AllocVal = CGF.EmitScalarExpr(Allocator);
11293 // According to the standard, the original allocator type is a enum
11294 // (integer). Convert to pointer type, if required.
11295 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11296 CGF.getContext().VoidPtrTy,
11297 Allocator->getExprLoc());
11298 } else {
11299 // If no allocator specified, it defaults to the null allocator.
11300 AllocVal = llvm::Constant::getNullValue(
11302 }
11303 return AllocVal;
11304}
11305
11306/// Return the alignment from an allocate directive if present.
11307static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11308 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11309
11310 if (!AllocateAlignment)
11311 return nullptr;
11312
11313 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11314}
11315
11317 const VarDecl *VD) {
11318 if (!VD)
11319 return Address::invalid();
11320 Address UntiedAddr = Address::invalid();
11321 Address UntiedRealAddr = Address::invalid();
11322 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11323 if (It != FunctionToUntiedTaskStackMap.end()) {
11324 const UntiedLocalVarsAddressesMap &UntiedData =
11325 UntiedLocalVarsStack[It->second];
11326 auto I = UntiedData.find(VD);
11327 if (I != UntiedData.end()) {
11328 UntiedAddr = I->second.first;
11329 UntiedRealAddr = I->second.second;
11330 }
11331 }
11332 const VarDecl *CVD = VD->getCanonicalDecl();
11333 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11334 // Use the default allocation.
11335 if (!isAllocatableDecl(VD))
11336 return UntiedAddr;
11337 llvm::Value *Size;
11338 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11339 if (CVD->getType()->isVariablyModifiedType()) {
11340 Size = CGF.getTypeSize(CVD->getType());
11341 // Align the size: ((size + align - 1) / align) * align
11342 Size = CGF.Builder.CreateNUWAdd(
11343 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11344 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11345 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11346 } else {
11348 Size = CGM.getSize(Sz.alignTo(Align));
11349 }
11350 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11351 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11352 const Expr *Allocator = AA->getAllocator();
11353 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11354 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11356 Args.push_back(ThreadID);
11357 if (Alignment)
11358 Args.push_back(Alignment);
11359 Args.push_back(Size);
11360 Args.push_back(AllocVal);
11361 llvm::omp::RuntimeFunction FnID =
11362 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11363 llvm::Value *Addr = CGF.EmitRuntimeCall(
11364 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11365 getName({CVD->getName(), ".void.addr"}));
11366 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11367 CGM.getModule(), OMPRTL___kmpc_free);
11370 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11371 if (UntiedAddr.isValid())
11372 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11373
11374 // Cleanup action for allocate support.
11375 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11376 llvm::FunctionCallee RTLFn;
11377 SourceLocation::UIntTy LocEncoding;
11378 Address Addr;
11379 const Expr *AllocExpr;
11380
11381 public:
11382 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11383 SourceLocation::UIntTy LocEncoding, Address Addr,
11384 const Expr *AllocExpr)
11385 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11386 AllocExpr(AllocExpr) {}
11387 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11388 if (!CGF.HaveInsertPoint())
11389 return;
11390 llvm::Value *Args[3];
11391 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11392 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11394 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11395 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11396 Args[2] = AllocVal;
11397 CGF.EmitRuntimeCall(RTLFn, Args);
11398 }
11399 };
11400 Address VDAddr =
11401 UntiedRealAddr.isValid()
11402 ? UntiedRealAddr
11403 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11404 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11405 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11406 VDAddr, Allocator);
11407 if (UntiedRealAddr.isValid())
11408 if (auto *Region =
11409 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11410 Region->emitUntiedSwitch(CGF);
11411 return VDAddr;
11412 }
11413 return UntiedAddr;
11414}
11415
11417 const VarDecl *VD) const {
11418 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11419 if (It == FunctionToUntiedTaskStackMap.end())
11420 return false;
11421 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11422}
11423
11426 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11427 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11428 if (!NeedToPush)
11429 return;
11431 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11432 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11433 for (const Stmt *Ref : C->private_refs()) {
11434 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11435 const ValueDecl *VD;
11436 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11437 VD = DRE->getDecl();
11438 } else {
11439 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11440 assert((ME->isImplicitCXXThis() ||
11441 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11442 "Expected member of current class.");
11443 VD = ME->getMemberDecl();
11444 }
11445 DS.insert(VD);
11446 }
11447 }
11448}
11449
11451 if (!NeedToPush)
11452 return;
11454}
11455
11457 CodeGenFunction &CGF,
11458 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11459 std::pair<Address, Address>> &LocalVars)
11460 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11461 if (!NeedToPush)
11462 return;
11464 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11465 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11466}
11467
11469 if (!NeedToPush)
11470 return;
11472}
11473
11475 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11476
11477 return llvm::any_of(
11479 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11480}
11481
11482void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11483 const OMPExecutableDirective &S,
11484 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11485 const {
11487 // Vars in target/task regions must be excluded completely.
11488 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11489 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11491 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11492 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11493 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11494 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11495 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11496 }
11497 }
11498 // Exclude vars in private clauses.
11499 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11500 for (const Expr *Ref : C->varlists()) {
11501 if (!Ref->getType()->isScalarType())
11502 continue;
11503 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11504 if (!DRE)
11505 continue;
11506 NeedToCheckForLPCs.insert(DRE->getDecl());
11507 }
11508 }
11509 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11510 for (const Expr *Ref : C->varlists()) {
11511 if (!Ref->getType()->isScalarType())
11512 continue;
11513 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11514 if (!DRE)
11515 continue;
11516 NeedToCheckForLPCs.insert(DRE->getDecl());
11517 }
11518 }
11519 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11520 for (const Expr *Ref : C->varlists()) {
11521 if (!Ref->getType()->isScalarType())
11522 continue;
11523 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11524 if (!DRE)
11525 continue;
11526 NeedToCheckForLPCs.insert(DRE->getDecl());
11527 }
11528 }
11529 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11530 for (const Expr *Ref : C->varlists()) {
11531 if (!Ref->getType()->isScalarType())
11532 continue;
11533 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11534 if (!DRE)
11535 continue;
11536 NeedToCheckForLPCs.insert(DRE->getDecl());
11537 }
11538 }
11539 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11540 for (const Expr *Ref : C->varlists()) {
11541 if (!Ref->getType()->isScalarType())
11542 continue;
11543 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11544 if (!DRE)
11545 continue;
11546 NeedToCheckForLPCs.insert(DRE->getDecl());
11547 }
11548 }
11549 for (const Decl *VD : NeedToCheckForLPCs) {
11550 for (const LastprivateConditionalData &Data :
11552 if (Data.DeclToUniqueName.count(VD) > 0) {
11553 if (!Data.Disabled)
11554 NeedToAddForLPCsAsDisabled.insert(VD);
11555 break;
11556 }
11557 }
11558 }
11559}
11560
11561CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11562 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11563 : CGM(CGF.CGM),
11564 Action((CGM.getLangOpts().OpenMP >= 50 &&
11565 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11566 [](const OMPLastprivateClause *C) {
11567 return C->getKind() ==
11568 OMPC_LASTPRIVATE_conditional;
11569 }))
11570 ? ActionToDo::PushAsLastprivateConditional
11571 : ActionToDo::DoNotPush) {
11572 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11573 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11574 return;
11575 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11576 "Expected a push action.");
11579 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11580 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11581 continue;
11582
11583 for (const Expr *Ref : C->varlists()) {
11584 Data.DeclToUniqueName.insert(std::make_pair(
11585 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11586 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11587 }
11588 }
11589 Data.IVLVal = IVLVal;
11590 Data.Fn = CGF.CurFn;
11591}
11592
11593CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11595 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11596 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11597 if (CGM.getLangOpts().OpenMP < 50)
11598 return;
11599 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11600 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11601 if (!NeedToAddForLPCsAsDisabled.empty()) {
11602 Action = ActionToDo::DisableLastprivateConditional;
11603 LastprivateConditionalData &Data =
11604 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11605 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11606 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11607 Data.Fn = CGF.CurFn;
11608 Data.Disabled = true;
11609 }
11610}
11611
11614 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11615 return LastprivateConditionalRAII(CGF, S);
11616}
11617
11619 if (CGM.getLangOpts().OpenMP < 50)
11620 return;
11621 if (Action == ActionToDo::DisableLastprivateConditional) {
11622 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11623 "Expected list of disabled private vars.");
11625 }
11626 if (Action == ActionToDo::PushAsLastprivateConditional) {
11627 assert(
11629 "Expected list of lastprivate conditional vars.");
11631 }
11632}
11633
11635 const VarDecl *VD) {
11637 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11638 if (I == LastprivateConditionalToTypes.end())
11639 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11640 QualType NewType;
11641 const FieldDecl *VDField;
11642 const FieldDecl *FiredField;
11643 LValue BaseLVal;
11644 auto VI = I->getSecond().find(VD);
11645 if (VI == I->getSecond().end()) {
11646 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11647 RD->startDefinition();
11648 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11649 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11650 RD->completeDefinition();
11651 NewType = C.getRecordType(RD);
11652 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11653 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11654 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11655 } else {
11656 NewType = std::get<0>(VI->getSecond());
11657 VDField = std::get<1>(VI->getSecond());
11658 FiredField = std::get<2>(VI->getSecond());
11659 BaseLVal = std::get<3>(VI->getSecond());
11660 }
11661 LValue FiredLVal =
11662 CGF.EmitLValueForField(BaseLVal, FiredField);
11664 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11665 FiredLVal);
11666 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11667}
11668
11669namespace {
11670/// Checks if the lastprivate conditional variable is referenced in LHS.
11671class LastprivateConditionalRefChecker final
11672 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11674 const Expr *FoundE = nullptr;
11675 const Decl *FoundD = nullptr;
11676 StringRef UniqueDeclName;
11677 LValue IVLVal;
11678 llvm::Function *FoundFn = nullptr;
11680
11681public:
11682 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11684 llvm::reverse(LPM)) {
11685 auto It = D.DeclToUniqueName.find(E->getDecl());
11686 if (It == D.DeclToUniqueName.end())
11687 continue;
11688 if (D.Disabled)
11689 return false;
11690 FoundE = E;
11691 FoundD = E->getDecl()->getCanonicalDecl();
11692 UniqueDeclName = It->second;
11693 IVLVal = D.IVLVal;
11694 FoundFn = D.Fn;
11695 break;
11696 }
11697 return FoundE == E;
11698 }
11699 bool VisitMemberExpr(const MemberExpr *E) {
11700 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11701 return false;
11703 llvm::reverse(LPM)) {
11704 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11705 if (It == D.DeclToUniqueName.end())
11706 continue;
11707 if (D.Disabled)
11708 return false;
11709 FoundE = E;
11710 FoundD = E->getMemberDecl()->getCanonicalDecl();
11711 UniqueDeclName = It->second;
11712 IVLVal = D.IVLVal;
11713 FoundFn = D.Fn;
11714 break;
11715 }
11716 return FoundE == E;
11717 }
11718 bool VisitStmt(const Stmt *S) {
11719 for (const Stmt *Child : S->children()) {
11720 if (!Child)
11721 continue;
11722 if (const auto *E = dyn_cast<Expr>(Child))
11723 if (!E->isGLValue())
11724 continue;
11725 if (Visit(Child))
11726 return true;
11727 }
11728 return false;
11729 }
11730 explicit LastprivateConditionalRefChecker(
11732 : LPM(LPM) {}
11733 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11734 getFoundData() const {
11735 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11736 }
11737};
11738} // namespace
11739
11741 LValue IVLVal,
11742 StringRef UniqueDeclName,
11743 LValue LVal,
11745 // Last updated loop counter for the lastprivate conditional var.
11746 // int<xx> last_iv = 0;
11747 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11748 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11749 LLIVTy, getName({UniqueDeclName, "iv"}));
11750 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11751 IVLVal.getAlignment().getAsAlign());
11752 LValue LastIVLVal =
11753 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11754
11755 // Last value of the lastprivate conditional.
11756 // decltype(priv_a) last_a;
11757 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11758 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11759 cast<llvm::GlobalVariable>(Last)->setAlignment(
11760 LVal.getAlignment().getAsAlign());
11761 LValue LastLVal =
11762 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11763
11764 // Global loop counter. Required to handle inner parallel-for regions.
11765 // iv
11766 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11767
11768 // #pragma omp critical(a)
11769 // if (last_iv <= iv) {
11770 // last_iv = iv;
11771 // last_a = priv_a;
11772 // }
11773 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11774 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11775 Action.Enter(CGF);
11776 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11777 // (last_iv <= iv) ? Check if the variable is updated and store new
11778 // value in global var.
11779 llvm::Value *CmpRes;
11780 if (IVLVal.getType()->isSignedIntegerType()) {
11781 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11782 } else {
11783 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11784 "Loop iteration variable must be integer.");
11785 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11786 }
11787 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11788 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11789 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11790 // {
11791 CGF.EmitBlock(ThenBB);
11792
11793 // last_iv = iv;
11794 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11795
11796 // last_a = priv_a;
11797 switch (CGF.getEvaluationKind(LVal.getType())) {
11798 case TEK_Scalar: {
11799 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11800 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11801 break;
11802 }
11803 case TEK_Complex: {
11805 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11806 break;
11807 }
11808 case TEK_Aggregate:
11809 llvm_unreachable(
11810 "Aggregates are not supported in lastprivate conditional.");
11811 }
11812 // }
11813 CGF.EmitBranch(ExitBB);
11814 // There is no need to emit line number for unconditional branch.
11816 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11817 };
11818
11819 if (CGM.getLangOpts().OpenMPSimd) {
11820 // Do not emit as a critical region as no parallel region could be emitted.
11821 RegionCodeGenTy ThenRCG(CodeGen);
11822 ThenRCG(CGF);
11823 } else {
11824 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11825 }
11826}
11827
11829 const Expr *LHS) {
11830 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11831 return;
11832 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11833 if (!Checker.Visit(LHS))
11834 return;
11835 const Expr *FoundE;
11836 const Decl *FoundD;
11837 StringRef UniqueDeclName;
11838 LValue IVLVal;
11839 llvm::Function *FoundFn;
11840 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11841 Checker.getFoundData();
11842 if (FoundFn != CGF.CurFn) {
11843 // Special codegen for inner parallel regions.
11844 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11845 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11846 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11847 "Lastprivate conditional is not found in outer region.");
11848 QualType StructTy = std::get<0>(It->getSecond());
11849 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11850 LValue PrivLVal = CGF.EmitLValue(FoundE);
11852 PrivLVal.getAddress(),
11853 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11854 CGF.ConvertTypeForMem(StructTy));
11855 LValue BaseLVal =
11856 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11857 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11858 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11859 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11860 FiredLVal, llvm::AtomicOrdering::Unordered,
11861 /*IsVolatile=*/true, /*isInit=*/false);
11862 return;
11863 }
11864
11865 // Private address of the lastprivate conditional in the current context.
11866 // priv_a
11867 LValue LVal = CGF.EmitLValue(FoundE);
11868 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11869 FoundE->getExprLoc());
11870}
11871
11874 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11875 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11876 return;
11877 auto Range = llvm::reverse(LastprivateConditionalStack);
11878 auto It = llvm::find_if(
11879 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11880 if (It == Range.end() || It->Fn != CGF.CurFn)
11881 return;
11882 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11883 assert(LPCI != LastprivateConditionalToTypes.end() &&
11884 "Lastprivates must be registered already.");
11886 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11887 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11888 for (const auto &Pair : It->DeclToUniqueName) {
11889 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11890 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11891 continue;
11892 auto I = LPCI->getSecond().find(Pair.first);
11893 assert(I != LPCI->getSecond().end() &&
11894 "Lastprivate must be rehistered already.");
11895 // bool Cmp = priv_a.Fired != 0;
11896 LValue BaseLVal = std::get<3>(I->getSecond());
11897 LValue FiredLVal =
11898 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11899 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11900 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11901 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11902 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11903 // if (Cmp) {
11904 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11905 CGF.EmitBlock(ThenBB);
11906 Address Addr = CGF.GetAddrOfLocalVar(VD);
11907 LValue LVal;
11908 if (VD->getType()->isReferenceType())
11909 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11911 else
11912 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11914 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11915 D.getBeginLoc());
11917 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11918 // }
11919 }
11920}
11921
11923 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11925 if (CGF.getLangOpts().OpenMP < 50)
11926 return;
11927 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11928 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11929 "Unknown lastprivate conditional variable.");
11930 StringRef UniqueName = It->second;
11931 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11932 // The variable was not updated in the region - exit.
11933 if (!GV)
11934 return;
11935 LValue LPLVal = CGF.MakeRawAddrLValue(
11936 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11937 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11938 CGF.EmitStoreOfScalar(Res, PrivLVal);
11939}
11940
11943 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11944 const RegionCodeGenTy &CodeGen) {
11945 llvm_unreachable("Not supported in SIMD-only mode");
11946}
11947
11950 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11951 const RegionCodeGenTy &CodeGen) {
11952 llvm_unreachable("Not supported in SIMD-only mode");
11953}
11954
11956 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11957 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11958 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11959 bool Tied, unsigned &NumberOfParts) {
11960 llvm_unreachable("Not supported in SIMD-only mode");
11961}
11962
11965 llvm::Function *OutlinedFn,
11966 ArrayRef<llvm::Value *> CapturedVars,
11967 const Expr *IfCond,
11968 llvm::Value *NumThreads) {
11969 llvm_unreachable("Not supported in SIMD-only mode");
11970}
11971
11973 CodeGenFunction &CGF, StringRef CriticalName,
11974 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11975 const Expr *Hint) {
11976 llvm_unreachable("Not supported in SIMD-only mode");
11977}
11978
11980 const RegionCodeGenTy &MasterOpGen,
11982 llvm_unreachable("Not supported in SIMD-only mode");
11983}
11984
11986 const RegionCodeGenTy &MasterOpGen,
11988 const Expr *Filter) {
11989 llvm_unreachable("Not supported in SIMD-only mode");
11990}
11991
11994 llvm_unreachable("Not supported in SIMD-only mode");
11995}
11996
11998 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12000 llvm_unreachable("Not supported in SIMD-only mode");
12001}
12002
12004 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12005 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12007 ArrayRef<const Expr *> AssignmentOps) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12009}
12010
12012 const RegionCodeGenTy &OrderedOpGen,
12014 bool IsThreads) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12016}
12017
12021 bool EmitChecks,
12022 bool ForceSimpleCall) {
12023 llvm_unreachable("Not supported in SIMD-only mode");
12024}
12025
12028 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12029 bool Ordered, const DispatchRTInput &DispatchValues) {
12030 llvm_unreachable("Not supported in SIMD-only mode");
12031}
12032
12035 llvm_unreachable("Not supported in SIMD-only mode");
12036}
12037
12040 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12041 llvm_unreachable("Not supported in SIMD-only mode");
12042}
12043
12046 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12047 llvm_unreachable("Not supported in SIMD-only mode");
12048}
12049
12052 unsigned IVSize,
12053 bool IVSigned) {
12054 llvm_unreachable("Not supported in SIMD-only mode");
12055}
12056
12059 OpenMPDirectiveKind DKind) {
12060 llvm_unreachable("Not supported in SIMD-only mode");
12061}
12062
12065 unsigned IVSize, bool IVSigned,
12066 Address IL, Address LB,
12067 Address UB, Address ST) {
12068 llvm_unreachable("Not supported in SIMD-only mode");
12069}
12070
12072 llvm::Value *NumThreads,
12074 llvm_unreachable("Not supported in SIMD-only mode");
12075}
12076
12078 ProcBindKind ProcBind,
12080 llvm_unreachable("Not supported in SIMD-only mode");
12081}
12082
12084 const VarDecl *VD,
12085 Address VDAddr,
12087 llvm_unreachable("Not supported in SIMD-only mode");
12088}
12089
12091 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12092 CodeGenFunction *CGF) {
12093 llvm_unreachable("Not supported in SIMD-only mode");
12094}
12095
12097 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12098 llvm_unreachable("Not supported in SIMD-only mode");
12099}
12100
12104 llvm::AtomicOrdering AO) {
12105 llvm_unreachable("Not supported in SIMD-only mode");
12106}
12107
12110 llvm::Function *TaskFunction,
12111 QualType SharedsTy, Address Shareds,
12112 const Expr *IfCond,
12113 const OMPTaskDataTy &Data) {
12114 llvm_unreachable("Not supported in SIMD-only mode");
12115}
12116
12119 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12120 const Expr *IfCond, const OMPTaskDataTy &Data) {
12121 llvm_unreachable("Not supported in SIMD-only mode");
12122}
12123
12127 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12128 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12129 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12130 ReductionOps, Options);
12131}
12132
12135 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12136 llvm_unreachable("Not supported in SIMD-only mode");
12137}
12138
12141 bool IsWorksharingReduction) {
12142 llvm_unreachable("Not supported in SIMD-only mode");
12143}
12144
12147 ReductionCodeGen &RCG,
12148 unsigned N) {
12149 llvm_unreachable("Not supported in SIMD-only mode");
12150}
12151
12154 llvm::Value *ReductionsPtr,
12155 LValue SharedLVal) {
12156 llvm_unreachable("Not supported in SIMD-only mode");
12157}
12158
12161 const OMPTaskDataTy &Data) {
12162 llvm_unreachable("Not supported in SIMD-only mode");
12163}
12164
12167 OpenMPDirectiveKind CancelRegion) {
12168 llvm_unreachable("Not supported in SIMD-only mode");
12169}
12170
12172 SourceLocation Loc, const Expr *IfCond,
12173 OpenMPDirectiveKind CancelRegion) {
12174 llvm_unreachable("Not supported in SIMD-only mode");
12175}
12176
12178 const OMPExecutableDirective &D, StringRef ParentName,
12179 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12180 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12181 llvm_unreachable("Not supported in SIMD-only mode");
12182}
12183
12186 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12187 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12188 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12189 const OMPLoopDirective &D)>
12190 SizeEmitter) {
12191 llvm_unreachable("Not supported in SIMD-only mode");
12192}
12193
12195 llvm_unreachable("Not supported in SIMD-only mode");
12196}
12197
12199 llvm_unreachable("Not supported in SIMD-only mode");
12200}
12201
12203 return false;
12204}
12205
12209 llvm::Function *OutlinedFn,
12210 ArrayRef<llvm::Value *> CapturedVars) {
12211 llvm_unreachable("Not supported in SIMD-only mode");
12212}
12213
12215 const Expr *NumTeams,
12216 const Expr *ThreadLimit,
12218 llvm_unreachable("Not supported in SIMD-only mode");
12219}
12220
12222 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12223 const Expr *Device, const RegionCodeGenTy &CodeGen,
12225 llvm_unreachable("Not supported in SIMD-only mode");
12226}
12227
12229 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12230 const Expr *Device) {
12231 llvm_unreachable("Not supported in SIMD-only mode");
12232}
12233
12235 const OMPLoopDirective &D,
12236 ArrayRef<Expr *> NumIterations) {
12237 llvm_unreachable("Not supported in SIMD-only mode");
12238}
12239
12241 const OMPDependClause *C) {
12242 llvm_unreachable("Not supported in SIMD-only mode");
12243}
12244
12246 const OMPDoacrossClause *C) {
12247 llvm_unreachable("Not supported in SIMD-only mode");
12248}
12249
12250const VarDecl *
12252 const VarDecl *NativeParam) const {
12253 llvm_unreachable("Not supported in SIMD-only mode");
12254}
12255
12256Address
12258 const VarDecl *NativeParam,
12259 const VarDecl *TargetParam) const {
12260 llvm_unreachable("Not supported in SIMD-only mode");
12261}
#define V(N, I)
Definition: ASTContext.h:3338
StringRef P
#define SM(sm)
Definition: Cuda.cpp:83
Provides LLVM's BitmaskEnum facility to enumeration types declared in namespace clang.
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
const Decl * D
Expr * E
enum clang::sema::@1651::IndirectLocalPathEntry::EntryKind Kind
Defines the clang::FileManager interface and associated types.
int Priority
Definition: Format.cpp:2993
#define X(type, name)
Definition: Value.h:143
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
SourceRange Range
Definition: SemaObjC.cpp:757
SourceLocation Loc
Definition: SemaObjC.cpp:758
Defines the SourceManager interface.
const char * Data
This file defines OpenMP AST classes for executable directives and clauses.
SourceLocation Begin
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:186
SourceManager & getSourceManager()
Definition: ASTContext.h:720
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2822
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2641
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
Definition: ASTContext.h:1145
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition: ASTContext.h:796
CanQualType BoolTy
Definition: ASTContext.h:1119
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1118
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2825
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:778
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:210
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:5086
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3540
Attr - This represents one attribute.
Definition: Attr.h:42
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2535
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2799
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:2060
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition: DeclCXX.h:2186
QualType getFunctionObjectParameterType() const
Definition: DeclCXX.h:2210
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
base_class_range bases()
Definition: DeclCXX.h:619
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1022
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1644
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition: DeclCXX.h:613
base_class_range vbases()
Definition: DeclCXX.h:636
capture_const_range captures() const
Definition: DeclCXX.h:1101
ctor_range ctors() const
Definition: DeclCXX.h:681
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition: DeclCXX.cpp:1978
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Definition: Redeclarable.h:349
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:3775
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3809
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1305
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3815
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:3803
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3806
This captures a statement into a function.
Definition: Stmt.h:3762
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:3913
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:3883
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3866
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition: Stmt.cpp:1431
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:3908
capture_range captures()
Definition: Stmt.h:3900
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition: Address.h:259
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
bool isValid() const
Definition: Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:892
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:899
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:909
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:156
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:135
llvm::Value * CreateIsNull(Address Addr, const Twine &Name="")
Definition: CGBuilder.h:354
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:291
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:202
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition: CGBuilder.h:240
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:107
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:363
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:277
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:113
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::FunctionType * Kmpc_MicroTy
The type for a microtask which gets passed to __kmpc_fork_call().
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *BasePtr, llvm::Value *Ptr, llvm::Value *Size, llvm::Value *MapType, llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit)
Emit the array initialization or deletion portion for user-defined mapper code generation.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits Callee function call with arguments Args with location Loc.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
llvm::SmallVector< NontemporalDeclsSet, 4 > NontemporalDeclsStack
Stack for list of declarations in current context marked as nontemporal.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CGCapturedStmtInfo * CapturedStmtInfo
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
const CodeGen::CGBlockInfo * BlockInfo
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
llvm::Type * ConvertTypeForMem(QualType T)
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
JumpDest ReturnBlock
ReturnBlock - Unified return block.
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
void EmitAutoVarCleanups(const AutoVarEmission &emission)
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
CodeGenTypes & getTypes() const
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
LValue EmitStringLiteralLValue(const StringLiteral *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:314
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
Definition: AMDGPU.cpp:671
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
const TargetInfo & getTarget() const
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
Definition: AMDGPU.cpp:644
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD)
Returns LLVM linkage for a declarator.
CGCXXABI & getCXXABI() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition: CGDecl.cpp:2847
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Function * CreateGlobalInitOrCleanUpFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false, llvm::GlobalVariable::LinkageTypes Linkage=llvm::GlobalVariable::InternalLinkage)
Definition: CGDeclCXX.cpp:443
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1632
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:680
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
Definition: CGCall.cpp:722
A specialization of Address that requires the address to be an LLVM Constant.
Definition: Address.h:294
static ConstantAddress invalid()
Definition: Address.h:302
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:631
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:243
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:368
LValue - This represents an lvalue references.
Definition: CGValue.h:182
CharUnits getAlignment() const
Definition: CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition: CGValue.h:338
Address getAddress() const
Definition: CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:346
QualType getType() const
Definition: CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:77
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
bool isValid() const
Definition: Address.h:62
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:76
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:195
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1425
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1742
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1265
ValueDecl * getDecl()
Definition: Expr.h:1333
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
SourceLocation getEndLoc() const LLVM_READONLY
Definition: DeclBase.h:441
T * getAttr() const
Definition: DeclBase.h:579
bool hasAttrs() const
Definition: DeclBase.h:524
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:523
void addAttr(Attr *A)
Definition: DeclBase.cpp:1014
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition: DeclBase.h:1072
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:565
SourceLocation getLocation() const
Definition: DeclBase.h:445
DeclContext * getDeclContext()
Definition: DeclBase.h:454
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: DeclBase.h:437
AttrVec & getAttrs()
Definition: DeclBase.h:530
bool hasAttr() const
Definition: DeclBase.h:583
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:957
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:783
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:873
The return type of classify().
Definition: Expr.h:330
This represents one expression.
Definition: Expr.h:110
bool isGLValue() const
Definition: Expr.h:280
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition: Expr.h:671
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:669
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3075
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3070
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3567
bool isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3929
Represents a member of a struct/union/class.
Definition: Decl.h:3030
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4533
Represents a function declaration or definition.
Definition: Decl.h:1932
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2669
QualType getReturnType() const
Definition: Decl.h:2717
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2646
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:3605
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3680
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5367
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition: Expr.cpp:977
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:539
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:535
virtual void mangleCanonicalTypeName(QualType T, raw_ostream &, bool NormalizeIntegers=false)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3187
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:3270
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
bool isExternallyVisible() const
Definition: Decl.h:408
This represents clause 'affinity' in the '#pragma omp task'-based directives.
Class that represents a component of a mappable expression.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:219
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:55
This represents '#pragma omp declare mapper ...' directive.
Definition: DeclOpenMP.h:287
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:177
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:238
This represents implicit clause 'depend' for the '#pragma omp task' directive.
This represents 'detach' clause in the '#pragma omp task' directive.
This represents 'device' clause in the '#pragma omp ...' directive.
This represents the 'doacross' clause for the '#pragma omp ordered' directive.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:266
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:446
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
This represents clause 'has_device_ptr' in the '#pragma omp ...' directives.
This represents 'if' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:527
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:596
This represents clause 'in_reduction' in the '#pragma omp task' directives.
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
OpenMP 5.0 [2.1.6 Iterators] Iterators are identifiers that expand to multiple values in the clause o...
Definition: ExprOpenMP.h:151
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This represents clause 'linear' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
Definition: StmtOpenMP.h:1004
This represents clause 'map' in the '#pragma omp ...' directives.
This represents clause 'nontemporal' in the '#pragma omp ...' directives.
This represents 'nowait' clause in the '#pragma omp ...' directive.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:676
This represents 'ordered' clause in the '#pragma omp ...' directive.
This represents clause 'private' in the '#pragma omp ...' directives.
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
Definition: DeclOpenMP.h:417
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
This represents clause 'uses_allocators' in the '#pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_dyn_cgroup_mem' clause in the '#pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1173
Represents a parameter to a function.
Definition: Decl.h:1722
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3161
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition: Type.h:941
void addRestrict()
Add the restrict qualifier to this QualType.
Definition: Type.h:1179
QualType withRestrict() const
Definition: Type.h:1182
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:1008
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:7743
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:7783
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:7944
QualType getCanonicalType() const
Definition: Type.h:7795
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition: Type.h:1542
Represents a struct/union/class.
Definition: Decl.h:4141
field_iterator field_end() const
Definition: Decl.h:4350
field_range fields() const
Definition: Decl.h:4347
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:5069
bool field_empty() const
Definition: Decl.h:4355
field_iterator field_begin() const
Definition: Decl.cpp:5057
RecordDecl * getDecl() const
Definition: Type.h:5946
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:204
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:226
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:3402
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
fileinfo_iterator fileinfo_end() const
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
fileinfo_iterator fileinfo_begin() const
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition: Stmt.cpp:197
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4725
bool isUnion() const
Definition: Decl.h:3763
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1576
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition: TargetInfo.h:1487
The base class of the type hierarchy.
Definition: Type.h:1829
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1882
bool isVoidType() const
Definition: Type.h:8295
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2167
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition: Type.h:8473
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2146
bool isArrayType() const
Definition: Type.h:8064
bool isPointerType() const
Definition: Type.h:7996
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8335
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8583
bool isReferenceType() const
Definition: Type.h:8010
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isLValueReferenceType() const
Definition: Type.h:8014
QualType getCanonicalTypeInternal() const
Definition: Type.h:2955
const RecordType * getAsStructureType() const
Definition: Type.cpp:721
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:8466
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2690
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: Type.h:8569
bool isFloatingType() const
Definition: Type.cpp:2249
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2196
bool isAnyPointerType() const
Definition: Type.h:8000
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8516
bool isRecordType() const
Definition: Type.h:8092
bool isUnionType() const
Definition: Type.cpp:671
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1890
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1886
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:667
QualType getType() const
Definition: Decl.h:678
Represents a variable declaration or definition.
Definition: Decl.h:879
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2239
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2348
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1165
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition: Decl.h:1132
@ DeclarationOnly
This declaration is only a declaration.
Definition: Decl.h:1243
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2357
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1210
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition: Decl.h:1306
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3769
Expr * getSizeExpr() const
Definition: Type.h:3788
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:33
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
@ NotKnownNonNull
Definition: Address.h:33
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition: ABI.h:25
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:24
@ ICIS_NoInit
No in-class initializer.
Definition: Specifiers.h:269
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition: Lambda.h:37
BinaryOperatorKind
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:38
@ OMPC_SCHEDULE_MODIFIER_last
Definition: OpenMPKinds.h:43
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition: OpenMPKinds.h:39
@ CR_OpenMP
Definition: CapturedStmt.h:19
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:103
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition: OpenMPKinds.h:54
@ OMPC_DEPEND_unknown
Definition: OpenMPKinds.h:58
@ Dtor_Complete
Complete object dtor.
Definition: ABI.h:35
@ Union
The "union" keyword.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:132
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:136
const FunctionProtoType * T
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
Definition: OpenMPKinds.h:131
@ OMPC_DEVICE_unknown
Definition: OpenMPKinds.h:50
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition: OpenMPKinds.h:78
@ OMPC_MAP_MODIFIER_unknown
Definition: OpenMPKinds.h:79
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:30
@ OMPC_SCHEDULE_unknown
Definition: OpenMPKinds.h:34
@ AS_public
Definition: Specifiers.h:121
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:70
@ OMPC_MAP_unknown
Definition: OpenMPKinds.h:74
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:26
#define bool
Definition: stdbool.h:24
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
llvm::IntegerType * IntTy
int
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
Extra information about a function prototype.
Definition: Type.h:5058
Helper expressions and declaration for OMPIteratorExpr class for each iteration space.
Definition: ExprOpenMP.h:111
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition: ExprOpenMP.h:121
Expr * Update
Update expression for the originally specified iteration variable, calculated as VD = Begin + Counter...
Definition: ExprOpenMP.h:119
VarDecl * CounterVD
Internal normalized counter.
Definition: ExprOpenMP.h:113
Data for list of allocators.
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:179
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:182
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:181
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:180
Describes how types, statements, expressions, and declarations should be printed.
Definition: PrettyPrinter.h:57