clang 20.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGRecordLayout.h"
18#include "CodeGenFunction.h"
19#include "TargetInfo.h"
20#include "clang/AST/APValue.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SetOperations.h"
33#include "llvm/ADT/SmallBitVector.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/StringExtras.h"
36#include "llvm/Bitcode/BitcodeReader.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Value.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Format.h"
44#include "llvm/Support/raw_ostream.h"
45#include <cassert>
46#include <cstdint>
47#include <numeric>
48#include <optional>
49
50using namespace clang;
51using namespace CodeGen;
52using namespace llvm::omp;
53
54namespace {
55/// Base class for handling code generation inside OpenMP regions.
56class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
57public:
58 /// Kinds of OpenMP regions used in codegen.
59 enum CGOpenMPRegionKind {
60 /// Region with outlined function for standalone 'parallel'
61 /// directive.
62 ParallelOutlinedRegion,
63 /// Region with outlined function for standalone 'task' directive.
64 TaskOutlinedRegion,
65 /// Region for constructs that do not require function outlining,
66 /// like 'for', 'sections', 'atomic' etc. directives.
67 InlinedRegion,
68 /// Region with outlined function for standalone 'target' directive.
69 TargetRegion,
70 };
71
72 CGOpenMPRegionInfo(const CapturedStmt &CS,
73 const CGOpenMPRegionKind RegionKind,
74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75 bool HasCancel)
76 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
77 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
78
79 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
80 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
81 bool HasCancel)
82 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
83 Kind(Kind), HasCancel(HasCancel) {}
84
85 /// Get a variable or parameter for storing global thread id
86 /// inside OpenMP construct.
87 virtual const VarDecl *getThreadIDVariable() const = 0;
88
89 /// Emit the captured statement body.
90 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
91
92 /// Get an LValue for the current ThreadID variable.
93 /// \return LValue for thread id variable. This LValue always has type int32*.
94 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
95
96 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
97
98 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
99
100 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
101
102 bool hasCancel() const { return HasCancel; }
103
104 static bool classof(const CGCapturedStmtInfo *Info) {
105 return Info->getKind() == CR_OpenMP;
106 }
107
108 ~CGOpenMPRegionInfo() override = default;
109
110protected:
111 CGOpenMPRegionKind RegionKind;
112 RegionCodeGenTy CodeGen;
114 bool HasCancel;
115};
116
117/// API for captured statement code generation in OpenMP constructs.
118class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
119public:
120 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
121 const RegionCodeGenTy &CodeGen,
122 OpenMPDirectiveKind Kind, bool HasCancel,
123 StringRef HelperName)
124 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
125 HasCancel),
126 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
127 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128 }
129
130 /// Get a variable or parameter for storing global thread id
131 /// inside OpenMP construct.
132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133
134 /// Get the name of the capture helper.
135 StringRef getHelperName() const override { return HelperName; }
136
137 static bool classof(const CGCapturedStmtInfo *Info) {
138 return CGOpenMPRegionInfo::classof(Info) &&
139 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
140 ParallelOutlinedRegion;
141 }
142
143private:
144 /// A variable or parameter storing global thread id for OpenMP
145 /// constructs.
146 const VarDecl *ThreadIDVar;
147 StringRef HelperName;
148};
149
150/// API for captured statement code generation in OpenMP constructs.
151class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
152public:
153 class UntiedTaskActionTy final : public PrePostActionTy {
154 bool Untied;
155 const VarDecl *PartIDVar;
156 const RegionCodeGenTy UntiedCodeGen;
157 llvm::SwitchInst *UntiedSwitch = nullptr;
158
159 public:
160 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
161 const RegionCodeGenTy &UntiedCodeGen)
162 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
163 void Enter(CodeGenFunction &CGF) override {
164 if (Untied) {
165 // Emit task switching point.
166 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
167 CGF.GetAddrOfLocalVar(PartIDVar),
168 PartIDVar->getType()->castAs<PointerType>());
169 llvm::Value *Res =
170 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
171 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
172 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
173 CGF.EmitBlock(DoneBB);
175 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
176 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
177 CGF.Builder.GetInsertBlock());
178 emitUntiedSwitch(CGF);
179 }
180 }
181 void emitUntiedSwitch(CodeGenFunction &CGF) const {
182 if (Untied) {
183 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
184 CGF.GetAddrOfLocalVar(PartIDVar),
185 PartIDVar->getType()->castAs<PointerType>());
186 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187 PartIdLVal);
188 UntiedCodeGen(CGF);
189 CodeGenFunction::JumpDest CurPoint =
190 CGF.getJumpDestInCurrentScope(".untied.next.");
192 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
193 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
194 CGF.Builder.GetInsertBlock());
195 CGF.EmitBranchThroughCleanup(CurPoint);
196 CGF.EmitBlock(CurPoint.getBlock());
197 }
198 }
199 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
200 };
201 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
202 const VarDecl *ThreadIDVar,
203 const RegionCodeGenTy &CodeGen,
204 OpenMPDirectiveKind Kind, bool HasCancel,
205 const UntiedTaskActionTy &Action)
206 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
207 ThreadIDVar(ThreadIDVar), Action(Action) {
208 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209 }
210
211 /// Get a variable or parameter for storing global thread id
212 /// inside OpenMP construct.
213 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
214
215 /// Get an LValue for the current ThreadID variable.
216 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
217
218 /// Get the name of the capture helper.
219 StringRef getHelperName() const override { return ".omp_outlined."; }
220
221 void emitUntiedSwitch(CodeGenFunction &CGF) override {
222 Action.emitUntiedSwitch(CGF);
223 }
224
225 static bool classof(const CGCapturedStmtInfo *Info) {
226 return CGOpenMPRegionInfo::classof(Info) &&
227 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
228 TaskOutlinedRegion;
229 }
230
231private:
232 /// A variable or parameter storing global thread id for OpenMP
233 /// constructs.
234 const VarDecl *ThreadIDVar;
235 /// Action for emitting code for untied tasks.
236 const UntiedTaskActionTy &Action;
237};
238
239/// API for inlined captured statement code generation in OpenMP
240/// constructs.
241class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
242public:
243 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
244 const RegionCodeGenTy &CodeGen,
245 OpenMPDirectiveKind Kind, bool HasCancel)
246 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
247 OldCSI(OldCSI),
248 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
249
250 // Retrieve the value of the context parameter.
251 llvm::Value *getContextValue() const override {
252 if (OuterRegionInfo)
253 return OuterRegionInfo->getContextValue();
254 llvm_unreachable("No context value for inlined OpenMP region");
255 }
256
257 void setContextValue(llvm::Value *V) override {
258 if (OuterRegionInfo) {
259 OuterRegionInfo->setContextValue(V);
260 return;
261 }
262 llvm_unreachable("No context value for inlined OpenMP region");
263 }
264
265 /// Lookup the captured field decl for a variable.
266 const FieldDecl *lookup(const VarDecl *VD) const override {
267 if (OuterRegionInfo)
268 return OuterRegionInfo->lookup(VD);
269 // If there is no outer outlined region,no need to lookup in a list of
270 // captured variables, we can use the original one.
271 return nullptr;
272 }
273
274 FieldDecl *getThisFieldDecl() const override {
275 if (OuterRegionInfo)
276 return OuterRegionInfo->getThisFieldDecl();
277 return nullptr;
278 }
279
280 /// Get a variable or parameter for storing global thread id
281 /// inside OpenMP construct.
282 const VarDecl *getThreadIDVariable() const override {
283 if (OuterRegionInfo)
284 return OuterRegionInfo->getThreadIDVariable();
285 return nullptr;
286 }
287
288 /// Get an LValue for the current ThreadID variable.
289 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
290 if (OuterRegionInfo)
291 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
292 llvm_unreachable("No LValue for inlined OpenMP construct");
293 }
294
295 /// Get the name of the capture helper.
296 StringRef getHelperName() const override {
297 if (auto *OuterRegionInfo = getOldCSI())
298 return OuterRegionInfo->getHelperName();
299 llvm_unreachable("No helper name for inlined OpenMP construct");
300 }
301
302 void emitUntiedSwitch(CodeGenFunction &CGF) override {
303 if (OuterRegionInfo)
304 OuterRegionInfo->emitUntiedSwitch(CGF);
305 }
306
307 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
308
309 static bool classof(const CGCapturedStmtInfo *Info) {
310 return CGOpenMPRegionInfo::classof(Info) &&
311 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
312 }
313
314 ~CGOpenMPInlinedRegionInfo() override = default;
315
316private:
317 /// CodeGen info about outer OpenMP region.
318 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
319 CGOpenMPRegionInfo *OuterRegionInfo;
320};
321
322/// API for captured statement code generation in OpenMP target
323/// constructs. For this captures, implicit parameters are used instead of the
324/// captured fields. The name of the target region has to be unique in a given
325/// application so it is provided by the client, because only the client has
326/// the information to generate that.
327class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
328public:
329 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
330 const RegionCodeGenTy &CodeGen, StringRef HelperName)
331 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
332 /*HasCancel=*/false),
333 HelperName(HelperName) {}
334
335 /// This is unused for target regions because each starts executing
336 /// with a single thread.
337 const VarDecl *getThreadIDVariable() const override { return nullptr; }
338
339 /// Get the name of the capture helper.
340 StringRef getHelperName() const override { return HelperName; }
341
342 static bool classof(const CGCapturedStmtInfo *Info) {
343 return CGOpenMPRegionInfo::classof(Info) &&
344 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
345 }
346
347private:
348 StringRef HelperName;
349};
350
351static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
352 llvm_unreachable("No codegen for expressions");
353}
354/// API for generation of expressions captured in a innermost OpenMP
355/// region.
356class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
357public:
358 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
359 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
360 OMPD_unknown,
361 /*HasCancel=*/false),
362 PrivScope(CGF) {
363 // Make sure the globals captured in the provided statement are local by
364 // using the privatization logic. We assume the same variable is not
365 // captured more than once.
366 for (const auto &C : CS.captures()) {
367 if (!C.capturesVariable() && !C.capturesVariableByCopy())
368 continue;
369
370 const VarDecl *VD = C.getCapturedVar();
371 if (VD->isLocalVarDeclOrParm())
372 continue;
373
374 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
375 /*RefersToEnclosingVariableOrCapture=*/false,
377 C.getLocation());
378 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
379 }
380 (void)PrivScope.Privatize();
381 }
382
383 /// Lookup the captured field decl for a variable.
384 const FieldDecl *lookup(const VarDecl *VD) const override {
385 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
386 return FD;
387 return nullptr;
388 }
389
390 /// Emit the captured statement body.
391 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
392 llvm_unreachable("No body for expressions");
393 }
394
395 /// Get a variable or parameter for storing global thread id
396 /// inside OpenMP construct.
397 const VarDecl *getThreadIDVariable() const override {
398 llvm_unreachable("No thread id for expressions");
399 }
400
401 /// Get the name of the capture helper.
402 StringRef getHelperName() const override {
403 llvm_unreachable("No helper name for expressions");
404 }
405
406 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
407
408private:
409 /// Private scope to capture global variables.
410 CodeGenFunction::OMPPrivateScope PrivScope;
411};
412
413/// RAII for emitting code of OpenMP constructs.
414class InlinedOpenMPRegionRAII {
415 CodeGenFunction &CGF;
416 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
417 FieldDecl *LambdaThisCaptureField = nullptr;
418 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
419 bool NoInheritance = false;
420
421public:
422 /// Constructs region for combined constructs.
423 /// \param CodeGen Code generation sequence for combined directives. Includes
424 /// a list of functions used for code generation of implicitly inlined
425 /// regions.
426 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
427 OpenMPDirectiveKind Kind, bool HasCancel,
428 bool NoInheritance = true)
429 : CGF(CGF), NoInheritance(NoInheritance) {
430 // Start emission for the construct.
431 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
432 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
433 if (NoInheritance) {
434 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
435 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
436 CGF.LambdaThisCaptureField = nullptr;
437 BlockInfo = CGF.BlockInfo;
438 CGF.BlockInfo = nullptr;
439 }
440 }
441
442 ~InlinedOpenMPRegionRAII() {
443 // Restore original CapturedStmtInfo only if we're done with code emission.
444 auto *OldCSI =
445 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
446 delete CGF.CapturedStmtInfo;
447 CGF.CapturedStmtInfo = OldCSI;
448 if (NoInheritance) {
449 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
450 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
451 CGF.BlockInfo = BlockInfo;
452 }
453 }
454};
455
456/// Values for bit flags used in the ident_t to describe the fields.
457/// All enumeric elements are named and described in accordance with the code
458/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
459enum OpenMPLocationFlags : unsigned {
460 /// Use trampoline for internal microtask.
461 OMP_IDENT_IMD = 0x01,
462 /// Use c-style ident structure.
463 OMP_IDENT_KMPC = 0x02,
464 /// Atomic reduction option for kmpc_reduce.
465 OMP_ATOMIC_REDUCE = 0x10,
466 /// Explicit 'barrier' directive.
467 OMP_IDENT_BARRIER_EXPL = 0x20,
468 /// Implicit barrier in code.
469 OMP_IDENT_BARRIER_IMPL = 0x40,
470 /// Implicit barrier in 'for' directive.
471 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
472 /// Implicit barrier in 'sections' directive.
473 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
474 /// Implicit barrier in 'single' directive.
475 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
476 /// Call of __kmp_for_static_init for static loop.
477 OMP_IDENT_WORK_LOOP = 0x200,
478 /// Call of __kmp_for_static_init for sections.
479 OMP_IDENT_WORK_SECTIONS = 0x400,
480 /// Call of __kmp_for_static_init for distribute.
481 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483};
484
485/// Describes ident structure that describes a source location.
486/// All descriptions are taken from
487/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
488/// Original structure:
489/// typedef struct ident {
490/// kmp_int32 reserved_1; /**< might be used in Fortran;
491/// see above */
492/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
493/// KMP_IDENT_KMPC identifies this union
494/// member */
495/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
496/// see above */
497///#if USE_ITT_BUILD
498/// /* but currently used for storing
499/// region-specific ITT */
500/// /* contextual information. */
501///#endif /* USE_ITT_BUILD */
502/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
503/// C++ */
504/// char const *psource; /**< String describing the source location.
505/// The string is composed of semi-colon separated
506// fields which describe the source file,
507/// the function and a pair of line numbers that
508/// delimit the construct.
509/// */
510/// } ident_t;
511enum IdentFieldIndex {
512 /// might be used in Fortran
513 IdentField_Reserved_1,
514 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
515 IdentField_Flags,
516 /// Not really used in Fortran any more
517 IdentField_Reserved_2,
518 /// Source[4] in Fortran, do not use for C++
519 IdentField_Reserved_3,
520 /// String describing the source location. The string is composed of
521 /// semi-colon separated fields which describe the source file, the function
522 /// and a pair of line numbers that delimit the construct.
523 IdentField_PSource
524};
525
526/// Schedule types for 'omp for' loops (these enumerators are taken from
527/// the enum sched_type in kmp.h).
528enum OpenMPSchedType {
529 /// Lower bound for default (unordered) versions.
530 OMP_sch_lower = 32,
531 OMP_sch_static_chunked = 33,
532 OMP_sch_static = 34,
533 OMP_sch_dynamic_chunked = 35,
534 OMP_sch_guided_chunked = 36,
535 OMP_sch_runtime = 37,
536 OMP_sch_auto = 38,
537 /// static with chunk adjustment (e.g., simd)
538 OMP_sch_static_balanced_chunked = 45,
539 /// Lower bound for 'ordered' versions.
540 OMP_ord_lower = 64,
541 OMP_ord_static_chunked = 65,
542 OMP_ord_static = 66,
543 OMP_ord_dynamic_chunked = 67,
544 OMP_ord_guided_chunked = 68,
545 OMP_ord_runtime = 69,
546 OMP_ord_auto = 70,
547 OMP_sch_default = OMP_sch_static,
548 /// dist_schedule types
549 OMP_dist_sch_static_chunked = 91,
550 OMP_dist_sch_static = 92,
551 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
552 /// Set if the monotonic schedule modifier was present.
553 OMP_sch_modifier_monotonic = (1 << 29),
554 /// Set if the nonmonotonic schedule modifier was present.
555 OMP_sch_modifier_nonmonotonic = (1 << 30),
556};
557
558/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
559/// region.
560class CleanupTy final : public EHScopeStack::Cleanup {
561 PrePostActionTy *Action;
562
563public:
564 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
565 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
566 if (!CGF.HaveInsertPoint())
567 return;
568 Action->Exit(CGF);
569 }
570};
571
572} // anonymous namespace
573
576 if (PrePostAction) {
577 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
578 Callback(CodeGen, CGF, *PrePostAction);
579 } else {
580 PrePostActionTy Action;
581 Callback(CodeGen, CGF, Action);
582 }
583}
584
585/// Check if the combiner is a call to UDR combiner and if it is so return the
586/// UDR decl used for reduction.
587static const OMPDeclareReductionDecl *
588getReductionInit(const Expr *ReductionOp) {
589 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
590 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
591 if (const auto *DRE =
592 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
593 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
594 return DRD;
595 return nullptr;
596}
597
599 const OMPDeclareReductionDecl *DRD,
600 const Expr *InitOp,
601 Address Private, Address Original,
602 QualType Ty) {
603 if (DRD->getInitializer()) {
604 std::pair<llvm::Function *, llvm::Function *> Reduction =
606 const auto *CE = cast<CallExpr>(InitOp);
607 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
608 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
609 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
610 const auto *LHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
612 const auto *RHSDRE =
613 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
614 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
615 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
616 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
617 (void)PrivateScope.Privatize();
619 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
620 CGF.EmitIgnoredExpr(InitOp);
621 } else {
622 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
623 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
624 auto *GV = new llvm::GlobalVariable(
625 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
626 llvm::GlobalValue::PrivateLinkage, Init, Name);
627 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
628 RValue InitRVal;
629 switch (CGF.getEvaluationKind(Ty)) {
630 case TEK_Scalar:
631 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
632 break;
633 case TEK_Complex:
634 InitRVal =
636 break;
637 case TEK_Aggregate: {
638 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
639 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
640 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
641 /*IsInitializer=*/false);
642 return;
643 }
644 }
645 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
646 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
647 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
648 /*IsInitializer=*/false);
649 }
650}
651
652/// Emit initialization of arrays of complex types.
653/// \param DestAddr Address of the array.
654/// \param Type Type of array.
655/// \param Init Initial expression of array.
656/// \param SrcAddr Address of the original array.
658 QualType Type, bool EmitDeclareReductionInit,
659 const Expr *Init,
660 const OMPDeclareReductionDecl *DRD,
661 Address SrcAddr = Address::invalid()) {
662 // Perform element-by-element initialization.
663 QualType ElementTy;
664
665 // Drill down to the base element type on both arrays.
666 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
667 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
668 if (DRD)
669 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
670
671 llvm::Value *SrcBegin = nullptr;
672 if (DRD)
673 SrcBegin = SrcAddr.emitRawPointer(CGF);
674 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
675 // Cast from pointer to array type to pointer to single element.
676 llvm::Value *DestEnd =
677 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
678 // The basic structure here is a while-do loop.
679 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
680 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
681 llvm::Value *IsEmpty =
682 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
683 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
684
685 // Enter the loop body, making that address the current address.
686 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
687 CGF.EmitBlock(BodyBB);
688
689 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
690
691 llvm::PHINode *SrcElementPHI = nullptr;
692 Address SrcElementCurrent = Address::invalid();
693 if (DRD) {
694 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
695 "omp.arraycpy.srcElementPast");
696 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
697 SrcElementCurrent =
698 Address(SrcElementPHI, SrcAddr.getElementType(),
699 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
700 }
701 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
702 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
703 DestElementPHI->addIncoming(DestBegin, EntryBB);
704 Address DestElementCurrent =
705 Address(DestElementPHI, DestAddr.getElementType(),
706 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
707
708 // Emit copy.
709 {
710 CodeGenFunction::RunCleanupsScope InitScope(CGF);
711 if (EmitDeclareReductionInit) {
712 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
713 SrcElementCurrent, ElementTy);
714 } else
715 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
716 /*IsInitializer=*/false);
717 }
718
719 if (DRD) {
720 // Shift the address forward by one element.
721 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
722 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
723 "omp.arraycpy.dest.element");
724 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
725 }
726
727 // Shift the address forward by one element.
728 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
729 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
730 "omp.arraycpy.dest.element");
731 // Check whether we've reached the end.
732 llvm::Value *Done =
733 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
734 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
735 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
736
737 // Done.
738 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
739}
740
741LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
742 return CGF.EmitOMPSharedLValue(E);
743}
744
745LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
746 const Expr *E) {
747 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
748 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
749 return LValue();
750}
751
752void ReductionCodeGen::emitAggregateInitialization(
753 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
754 const OMPDeclareReductionDecl *DRD) {
755 // Emit VarDecl with copy init for arrays.
756 // Get the address of the original variable captured in current
757 // captured region.
758 const auto *PrivateVD =
759 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
760 bool EmitDeclareReductionInit =
761 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
762 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
763 EmitDeclareReductionInit,
764 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
765 : PrivateVD->getInit(),
766 DRD, SharedAddr);
767}
768
771 ArrayRef<const Expr *> Privates,
772 ArrayRef<const Expr *> ReductionOps) {
773 ClausesData.reserve(Shareds.size());
774 SharedAddresses.reserve(Shareds.size());
775 Sizes.reserve(Shareds.size());
776 BaseDecls.reserve(Shareds.size());
777 const auto *IOrig = Origs.begin();
778 const auto *IPriv = Privates.begin();
779 const auto *IRed = ReductionOps.begin();
780 for (const Expr *Ref : Shareds) {
781 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
782 std::advance(IOrig, 1);
783 std::advance(IPriv, 1);
784 std::advance(IRed, 1);
785 }
786}
787
789 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
790 "Number of generated lvalues must be exactly N.");
791 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
792 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
793 SharedAddresses.emplace_back(First, Second);
794 if (ClausesData[N].Shared == ClausesData[N].Ref) {
795 OrigAddresses.emplace_back(First, Second);
796 } else {
797 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
798 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
799 OrigAddresses.emplace_back(First, Second);
800 }
801}
802
804 QualType PrivateType = getPrivateType(N);
805 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
806 if (!PrivateType->isVariablyModifiedType()) {
807 Sizes.emplace_back(
808 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
809 nullptr);
810 return;
811 }
812 llvm::Value *Size;
813 llvm::Value *SizeInChars;
814 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
815 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
816 if (AsArraySection) {
817 Size = CGF.Builder.CreatePtrDiff(ElemType,
818 OrigAddresses[N].second.getPointer(CGF),
819 OrigAddresses[N].first.getPointer(CGF));
820 Size = CGF.Builder.CreateNUWAdd(
821 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
822 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
823 } else {
824 SizeInChars =
825 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
826 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
827 }
828 Sizes.emplace_back(SizeInChars, Size);
830 CGF,
831 cast<OpaqueValueExpr>(
832 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
833 RValue::get(Size));
834 CGF.EmitVariablyModifiedType(PrivateType);
835}
836
838 llvm::Value *Size) {
839 QualType PrivateType = getPrivateType(N);
840 if (!PrivateType->isVariablyModifiedType()) {
841 assert(!Size && !Sizes[N].second &&
842 "Size should be nullptr for non-variably modified reduction "
843 "items.");
844 return;
845 }
847 CGF,
848 cast<OpaqueValueExpr>(
849 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
850 RValue::get(Size));
851 CGF.EmitVariablyModifiedType(PrivateType);
852}
853
855 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
856 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
857 assert(SharedAddresses.size() > N && "No variable was generated");
858 const auto *PrivateVD =
859 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
860 const OMPDeclareReductionDecl *DRD =
861 getReductionInit(ClausesData[N].ReductionOp);
862 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
863 if (DRD && DRD->getInitializer())
864 (void)DefaultInit(CGF);
865 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
866 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
867 (void)DefaultInit(CGF);
868 QualType SharedType = SharedAddresses[N].first.getType();
869 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
870 PrivateAddr, SharedAddr, SharedType);
871 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
872 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
873 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
874 PrivateVD->getType().getQualifiers(),
875 /*IsInitializer=*/false);
876 }
877}
878
880 QualType PrivateType = getPrivateType(N);
881 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
882 return DTorKind != QualType::DK_none;
883}
884
886 Address PrivateAddr) {
887 QualType PrivateType = getPrivateType(N);
888 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
889 if (needCleanups(N)) {
890 PrivateAddr =
891 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
892 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
893 }
894}
895
897 LValue BaseLV) {
898 BaseTy = BaseTy.getNonReferenceType();
899 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
900 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
901 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
902 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
903 } else {
904 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
905 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
906 }
907 BaseTy = BaseTy->getPointeeType();
908 }
909 return CGF.MakeAddrLValue(
910 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
911 BaseLV.getType(), BaseLV.getBaseInfo(),
912 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
913}
914
916 Address OriginalBaseAddress, llvm::Value *Addr) {
918 Address TopTmp = Address::invalid();
919 Address MostTopTmp = Address::invalid();
920 BaseTy = BaseTy.getNonReferenceType();
921 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
922 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
923 Tmp = CGF.CreateMemTemp(BaseTy);
924 if (TopTmp.isValid())
925 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
926 else
927 MostTopTmp = Tmp;
928 TopTmp = Tmp;
929 BaseTy = BaseTy->getPointeeType();
930 }
931
932 if (Tmp.isValid()) {
934 Addr, Tmp.getElementType());
935 CGF.Builder.CreateStore(Addr, Tmp);
936 return MostTopTmp;
937 }
938
940 Addr, OriginalBaseAddress.getType());
941 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
942}
943
944static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
945 const VarDecl *OrigVD = nullptr;
946 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
947 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
949 Base = TempOASE->getBase()->IgnoreParenImpCasts();
950 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
951 Base = TempASE->getBase()->IgnoreParenImpCasts();
952 DE = cast<DeclRefExpr>(Base);
953 OrigVD = cast<VarDecl>(DE->getDecl());
954 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
955 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
956 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
957 Base = TempASE->getBase()->IgnoreParenImpCasts();
958 DE = cast<DeclRefExpr>(Base);
959 OrigVD = cast<VarDecl>(DE->getDecl());
960 }
961 return OrigVD;
962}
963
965 Address PrivateAddr) {
966 const DeclRefExpr *DE;
967 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
968 BaseDecls.emplace_back(OrigVD);
969 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
970 LValue BaseLValue =
971 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
972 OriginalBaseLValue);
973 Address SharedAddr = SharedAddresses[N].first.getAddress();
974 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
975 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
976 SharedAddr.emitRawPointer(CGF));
977 llvm::Value *PrivatePointer =
979 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
980 llvm::Value *Ptr = CGF.Builder.CreateGEP(
981 SharedAddr.getElementType(), PrivatePointer, Adjustment);
982 return castToBase(CGF, OrigVD->getType(),
983 SharedAddresses[N].first.getType(),
984 OriginalBaseLValue.getAddress(), Ptr);
985 }
986 BaseDecls.emplace_back(
987 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
988 return PrivateAddr;
989}
990
992 const OMPDeclareReductionDecl *DRD =
993 getReductionInit(ClausesData[N].ReductionOp);
994 return DRD && DRD->getInitializer();
995}
996
997LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
998 return CGF.EmitLoadOfPointerLValue(
999 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1000 getThreadIDVariable()->getType()->castAs<PointerType>());
1001}
1002
1003void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1004 if (!CGF.HaveInsertPoint())
1005 return;
1006 // 1.2.2 OpenMP Language Terminology
1007 // Structured block - An executable statement with a single entry at the
1008 // top and a single exit at the bottom.
1009 // The point of exit cannot be a branch out of the structured block.
1010 // longjmp() and throw() must not violate the entry/exit criteria.
1011 CGF.EHStack.pushTerminate();
1012 if (S)
1014 CodeGen(CGF);
1015 CGF.EHStack.popTerminate();
1016}
1017
1018LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1019 CodeGenFunction &CGF) {
1020 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1021 getThreadIDVariable()->getType(),
1023}
1024
1026 QualType FieldTy) {
1027 auto *Field = FieldDecl::Create(
1028 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1029 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1030 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1031 Field->setAccess(AS_public);
1032 DC->addDecl(Field);
1033 return Field;
1034}
1035
1037 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1038 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1039 llvm::OpenMPIRBuilderConfig Config(
1040 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1041 CGM.getLangOpts().OpenMPOffloadMandatory,
1042 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1043 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1047 : StringRef{});
1048 OMPBuilder.setConfig(Config);
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056}
1057
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071}
1072
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075}
1076
1077static llvm::Function *
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(&OmpOutParm);
1090 Args.push_back(&OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097 Name, &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099 if (CGM.getLangOpts().Optimize) {
1100 Fn->removeFnAttr(llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108 Out->getLocation());
1109 CodeGenFunction::OMPPrivateScope Scope(CGF);
1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111 Scope.addPrivate(
1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115 Scope.addPrivate(
1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Out->getInit())) {
1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122 Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130}
1131
1134 if (UDRMap.count(D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, D->getType(), D->getCombiner(),
1138 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1139 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1144 CGM, D->getType(),
1145 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1146 : nullptr,
1147 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1148 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(D, Combiner, Initializer);
1152 if (CGF) {
1153 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1154 Decls.second.push_back(D);
1155 }
1156}
1157
1158std::pair<llvm::Function *, llvm::Function *>
1160 auto I = UDRMap.find(D);
1161 if (I != UDRMap.end())
1162 return I->second;
1163 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164 return UDRMap.lookup(D);
1165}
1166
1167namespace {
1168// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169// Builder if one is present.
1170struct PushAndPopStackRAII {
1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172 bool HasCancel, llvm::omp::Directive Kind)
1173 : OMPBuilder(OMPBuilder) {
1174 if (!OMPBuilder)
1175 return;
1176
1177 // The following callback is the crucial part of clangs cleanup process.
1178 //
1179 // NOTE:
1180 // Once the OpenMPIRBuilder is used to create parallel regions (and
1181 // similar), the cancellation destination (Dest below) is determined via
1182 // IP. That means if we have variables to finalize we split the block at IP,
1183 // use the new block (=BB) as destination to build a JumpDest (via
1184 // getJumpDestInCurrentScope(BB)) which then is fed to
1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186 // to push & pop an FinalizationInfo object.
1187 // The FiniCB will still be needed but at the point where the
1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190 assert(IP.getBlock()->end() == IP.getPoint() &&
1191 "Clang CG should cause non-terminated block!");
1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193 CGF.Builder.restoreIP(IP);
1195 CGF.getOMPCancelDestination(OMPD_parallel);
1196 CGF.EmitBranchThroughCleanup(Dest);
1197 };
1198
1199 // TODO: Remove this once we emit parallel regions through the
1200 // OpenMPIRBuilder as it can do this setup internally.
1201 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1202 OMPBuilder->pushFinalizationCB(std::move(FI));
1203 }
1204 ~PushAndPopStackRAII() {
1205 if (OMPBuilder)
1206 OMPBuilder->popFinalizationCB();
1207 }
1208 llvm::OpenMPIRBuilder *OMPBuilder;
1209};
1210} // namespace
1211
1213 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1214 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1215 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1216 assert(ThreadIDVar->getType()->isPointerType() &&
1217 "thread id variable must be of type kmp_int32 *");
1218 CodeGenFunction CGF(CGM, true);
1219 bool HasCancel = false;
1220 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1223 HasCancel = OPD->hasCancel();
1224 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1225 HasCancel = OPSD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232 else if (const auto *OPFD =
1233 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD =
1236 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1237 HasCancel = OPFD->hasCancel();
1238
1239 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1240 // parallel region to make cancellation barriers work properly.
1241 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1242 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1243 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1244 HasCancel, OutlinedHelperName);
1245 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1247}
1248
1249std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1250 std::string Suffix = getName({"omp_outlined"});
1251 return (Name + Suffix).str();
1252}
1253
1255 return getOutlinedHelperName(CGF.CurFn->getName());
1256}
1257
1258std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1259 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1260 return (Name + Suffix).str();
1261}
1262
1265 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1266 const RegionCodeGenTy &CodeGen) {
1267 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1269 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1270 CodeGen);
1271}
1272
1275 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1276 const RegionCodeGenTy &CodeGen) {
1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1280 CodeGen);
1281}
1282
1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1287 bool Tied, unsigned &NumberOfParts) {
1288 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1289 PrePostActionTy &) {
1290 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1291 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1292 llvm::Value *TaskArgs[] = {
1293 UpLoc, ThreadID,
1294 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1295 TaskTVar->getType()->castAs<PointerType>())
1296 .getPointer(CGF)};
1297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1298 CGM.getModule(), OMPRTL___kmpc_omp_task),
1299 TaskArgs);
1300 };
1301 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1302 UntiedCodeGen);
1303 CodeGen.setAction(Action);
1304 assert(!ThreadIDVar->getType()->isPointerType() &&
1305 "thread id variable must be of type kmp_int32 for tasks");
1306 const OpenMPDirectiveKind Region =
1307 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1308 : OMPD_task;
1309 const CapturedStmt *CS = D.getCapturedStmt(Region);
1310 bool HasCancel = false;
1311 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319
1320 CodeGenFunction CGF(CGM, true);
1321 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1322 InnermostKind, HasCancel, Action);
1323 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1324 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1325 if (!Tied)
1326 NumberOfParts = Action.getNumberOfParts();
1327 return Res;
1328}
1329
1331 bool AtCurrentPoint) {
1332 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1333 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1334
1335 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1336 if (AtCurrentPoint) {
1337 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1338 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1339 } else {
1340 Elem.second.ServiceInsertPt =
1341 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1343 }
1344}
1345
1347 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1348 if (Elem.second.ServiceInsertPt) {
1349 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1350 Elem.second.ServiceInsertPt = nullptr;
1351 Ptr->eraseFromParent();
1352 }
1353}
1354
1357 SmallString<128> &Buffer) {
1358 llvm::raw_svector_ostream OS(Buffer);
1359 // Build debug location
1361 OS << ";" << PLoc.getFilename() << ";";
1362 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1363 OS << FD->getQualifiedNameAsString();
1364 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1365 return OS.str();
1366}
1367
1370 unsigned Flags, bool EmitLoc) {
1371 uint32_t SrcLocStrSize;
1372 llvm::Constant *SrcLocStr;
1373 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1374 llvm::codegenoptions::NoDebugInfo) ||
1375 Loc.isInvalid()) {
1376 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1377 } else {
1378 std::string FunctionName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1382 const char *FileName = PLoc.getFilename();
1383 unsigned Line = PLoc.getLine();
1384 unsigned Column = PLoc.getColumn();
1385 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1386 Column, SrcLocStrSize);
1387 }
1388 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1389 return OMPBuilder.getOrCreateIdent(
1390 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1391}
1392
1395 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1396 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1397 // the clang invariants used below might be broken.
1398 if (CGM.getLangOpts().OpenMPIRBuilder) {
1399 SmallString<128> Buffer;
1400 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1401 uint32_t SrcLocStrSize;
1402 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1403 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1404 return OMPBuilder.getOrCreateThreadID(
1405 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1406 }
1407
1408 llvm::Value *ThreadID = nullptr;
1409 // Check whether we've already cached a load of the thread id in this
1410 // function.
1411 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1412 if (I != OpenMPLocThreadIDMap.end()) {
1413 ThreadID = I->second.ThreadID;
1414 if (ThreadID != nullptr)
1415 return ThreadID;
1416 }
1417 // If exceptions are enabled, do not use parameter to avoid possible crash.
1418 if (auto *OMPRegionInfo =
1419 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1420 if (OMPRegionInfo->getThreadIDVariable()) {
1421 // Check if this an outlined function with thread id passed as argument.
1422 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1423 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1424 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1425 !CGF.getLangOpts().CXXExceptions ||
1426 CGF.Builder.GetInsertBlock() == TopBlock ||
1427 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429 TopBlock ||
1430 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1431 CGF.Builder.GetInsertBlock()) {
1432 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1433 // If value loaded in entry block, cache it and use it everywhere in
1434 // function.
1435 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1436 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1437 Elem.second.ThreadID = ThreadID;
1438 }
1439 return ThreadID;
1440 }
1441 }
1442 }
1443
1444 // This is not an outlined function region - need to call __kmpc_int32
1445 // kmpc_global_thread_num(ident_t *loc).
1446 // Generate thread id value and cache this value for use across the
1447 // function.
1448 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1449 if (!Elem.second.ServiceInsertPt)
1451 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1452 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454 llvm::CallInst *Call = CGF.Builder.CreateCall(
1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456 OMPRTL___kmpc_global_thread_num),
1457 emitUpdateLocation(CGF, Loc));
1458 Call->setCallingConv(CGF.getRuntimeCC());
1459 Elem.second.ThreadID = Call;
1460 return Call;
1461}
1462
1464 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1467 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468 }
1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471 UDRMap.erase(D);
1472 FunctionUDRMap.erase(CGF.CurFn);
1473 }
1474 auto I = FunctionUDMMap.find(CGF.CurFn);
1475 if (I != FunctionUDMMap.end()) {
1476 for(const auto *D : I->second)
1477 UDMMap.erase(D);
1478 FunctionUDMMap.erase(I);
1479 }
1482}
1483
1485 return OMPBuilder.IdentPtr;
1486}
1487
1489 if (!Kmpc_MicroTy) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494 }
1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496}
1497
1498llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533 break;
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1536 break;
1537 default:
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1539 break;
1540 }
1541}
1542
1543static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1544 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1545 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1546
1547 auto FileInfoCallBack = [&]() {
1549 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1550
1551 llvm::sys::fs::UniqueID ID;
1552 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1553 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1554 }
1555
1556 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1557 };
1558
1559 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1560}
1561
1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1564
1565 auto LinkageForVariable = [&VD, this]() {
1567 };
1568
1569 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1576 VD->isExternallyVisible(),
1578 VD->getCanonicalDecl()->getBeginLoc()),
1579 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1580 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1581 LinkageForVariable);
1582
1583 if (!addr)
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1586}
1587
1588llvm::Constant *
1590 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix = getName({"cache", ""});
1594 return OMPBuilder.getOrCreateInternalVariable(
1595 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1596}
1597
1599 const VarDecl *VD,
1600 Address VDAddr,
1602 if (CGM.getLangOpts().OpenMPUseTLS &&
1604 return VDAddr;
1605
1606 llvm::Type *VarTy = VDAddr.getElementType();
1607 llvm::Value *Args[] = {
1609 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1612 return Address(
1613 CGF.EmitRuntimeCall(
1614 OMPBuilder.getOrCreateRuntimeFunction(
1615 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1616 Args),
1617 CGF.Int8Ty, VDAddr.getAlignment());
1618}
1619
1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624 // library.
1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1627 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1628 OMPLoc);
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value *Args[] = {
1632 OMPLoc,
1633 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1634 Ctor, CopyCtor, Dtor};
1635 CGF.EmitRuntimeCall(
1636 OMPBuilder.getOrCreateRuntimeFunction(
1637 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1638 Args);
1639}
1640
1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643 bool PerformInit, CodeGenFunction *CGF) {
1644 if (CGM.getLangOpts().OpenMPUseTLS &&
1646 return nullptr;
1647
1648 VD = VD->getDefinition(CGM.getContext());
1649 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1650 QualType ASTTy = VD->getType();
1651
1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653 const Expr *Init = VD->getAnyInitializer();
1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction CtorCGF(CGM);
1658 FunctionArgList Args;
1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1662 Args.push_back(&Dst);
1663
1665 CGM.getContext().VoidPtrTy, Args);
1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1667 std::string Name = getName({"__kmpc_global_ctor_", ""});
1668 llvm::Function *Fn =
1670 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1671 Args, Loc, Loc);
1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1676 VDAddr.getAlignment());
1677 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal = CtorCGF.EmitLoadOfScalar(
1680 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1682 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1683 CtorCGF.FinishFunction();
1684 Ctor = Fn;
1685 }
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction DtorCGF(CGM);
1690 FunctionArgList Args;
1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1694 Args.push_back(&Dst);
1695
1697 CGM.getContext().VoidTy, Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1699 std::string Name = getName({"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1702 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1703 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1704 Loc, Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 DtorCGF.GetAddrOfLocalVar(&Dst),
1709 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1710 DtorCGF.emitDestroy(
1711 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1712 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1713 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1722 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1723 /*isVarArg=*/false)
1724 ->getPointerTo();
1725 // Copying constructor for the threadprivate variable.
1726 // Must be NULL - reserved by runtime, but currently it requires that this
1727 // parameter is always NULL. Otherwise it fires assertion.
1728 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1729 if (Ctor == nullptr) {
1730 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1731 /*isVarArg=*/false)
1732 ->getPointerTo();
1733 Ctor = llvm::Constant::getNullValue(CtorTy);
1734 }
1735 if (Dtor == nullptr) {
1736 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1737 /*isVarArg=*/false)
1738 ->getPointerTo();
1739 Dtor = llvm::Constant::getNullValue(DtorTy);
1740 }
1741 if (!CGF) {
1742 auto *InitFunctionTy =
1743 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1744 std::string Name = getName({"__omp_threadprivate_init_", ""});
1745 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1746 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1747 CodeGenFunction InitCGF(CGM);
1748 FunctionArgList ArgList;
1749 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1750 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1751 Loc, Loc);
1752 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753 InitCGF.FinishFunction();
1754 return InitFunction;
1755 }
1756 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1757 }
1758 return nullptr;
1759}
1760
1762 llvm::GlobalValue *GV) {
1763 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1764 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1765
1766 // We only need to handle active 'indirect' declare target functions.
1767 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1768 return;
1769
1770 // Get a mangled name to store the new device global in.
1771 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1773 SmallString<128> Name;
1774 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1775
1776 // We need to generate a new global to hold the address of the indirectly
1777 // called device function. Doing this allows us to keep the visibility and
1778 // linkage of the associated function unchanged while allowing the runtime to
1779 // access its value.
1780 llvm::GlobalValue *Addr = GV;
1781 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1782 Addr = new llvm::GlobalVariable(
1784 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1785 nullptr, llvm::GlobalValue::NotThreadLocal,
1786 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1787 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1788 }
1789
1790 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1792 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1793 llvm::GlobalValue::WeakODRLinkage);
1794}
1795
1797 QualType VarType,
1798 StringRef Name) {
1799 std::string Suffix = getName({"artificial", ""});
1800 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1801 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1802 VarLVType, Twine(Name).concat(Suffix).str());
1803 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1805 GAddr->setThreadLocal(/*Val=*/true);
1806 return Address(GAddr, GAddr->getValueType(),
1808 }
1809 std::string CacheSuffix = getName({"cache", ""});
1810 llvm::Value *Args[] = {
1814 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1815 /*isSigned=*/false),
1816 OMPBuilder.getOrCreateInternalVariable(
1818 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1819 return Address(
1821 CGF.EmitRuntimeCall(
1822 OMPBuilder.getOrCreateRuntimeFunction(
1823 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1824 Args),
1825 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1826 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1827}
1828
1830 const RegionCodeGenTy &ThenGen,
1831 const RegionCodeGenTy &ElseGen) {
1832 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1833
1834 // If the condition constant folds and can be elided, try to avoid emitting
1835 // the condition and the dead arm of the if/else.
1836 bool CondConstant;
1837 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1838 if (CondConstant)
1839 ThenGen(CGF);
1840 else
1841 ElseGen(CGF);
1842 return;
1843 }
1844
1845 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1846 // emit the conditional branch.
1847 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1848 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1849 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1850 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1851
1852 // Emit the 'then' code.
1853 CGF.EmitBlock(ThenBlock);
1854 ThenGen(CGF);
1855 CGF.EmitBranch(ContBlock);
1856 // Emit the 'else' code if present.
1857 // There is no need to emit line number for unconditional branch.
1859 CGF.EmitBlock(ElseBlock);
1860 ElseGen(CGF);
1861 // There is no need to emit line number for unconditional branch.
1863 CGF.EmitBranch(ContBlock);
1864 // Emit the continuation block for code after the if.
1865 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1866}
1867
1869 llvm::Function *OutlinedFn,
1870 ArrayRef<llvm::Value *> CapturedVars,
1871 const Expr *IfCond,
1872 llvm::Value *NumThreads) {
1873 if (!CGF.HaveInsertPoint())
1874 return;
1875 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1876 auto &M = CGM.getModule();
1877 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1878 this](CodeGenFunction &CGF, PrePostActionTy &) {
1879 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1881 llvm::Value *Args[] = {
1882 RTLoc,
1883 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1884 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1886 RealArgs.append(std::begin(Args), std::end(Args));
1887 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1888
1889 llvm::FunctionCallee RTLFn =
1890 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1891 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1892 };
1893 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1894 this](CodeGenFunction &CGF, PrePostActionTy &) {
1896 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1897 // Build calls:
1898 // __kmpc_serialized_parallel(&Loc, GTid);
1899 llvm::Value *Args[] = {RTLoc, ThreadID};
1900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1901 M, OMPRTL___kmpc_serialized_parallel),
1902 Args);
1903
1904 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1905 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1906 RawAddress ZeroAddrBound =
1908 /*Name=*/".bound.zero.addr");
1909 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1911 // ThreadId for serialized parallels is 0.
1912 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1913 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1914 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1915
1916 // Ensure we do not inline the function. This is trivially true for the ones
1917 // passed to __kmpc_fork_call but the ones called in serialized regions
1918 // could be inlined. This is not a perfect but it is closer to the invariant
1919 // we want, namely, every data environment starts with a new function.
1920 // TODO: We should pass the if condition to the runtime function and do the
1921 // handling there. Much cleaner code.
1922 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1923 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1924 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1925
1926 // __kmpc_end_serialized_parallel(&Loc, GTid);
1927 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1928 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1929 M, OMPRTL___kmpc_end_serialized_parallel),
1930 EndArgs);
1931 };
1932 if (IfCond) {
1933 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1934 } else {
1935 RegionCodeGenTy ThenRCG(ThenGen);
1936 ThenRCG(CGF);
1937 }
1938}
1939
1940// If we're inside an (outlined) parallel region, use the region info's
1941// thread-ID variable (it is passed in a first argument of the outlined function
1942// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1943// regular serial code region, get thread ID by calling kmp_int32
1944// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1945// return the address of that temp.
1948 if (auto *OMPRegionInfo =
1949 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1950 if (OMPRegionInfo->getThreadIDVariable())
1951 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1952
1953 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1954 QualType Int32Ty =
1955 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1956 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1957 CGF.EmitStoreOfScalar(ThreadID,
1958 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1959
1960 return ThreadIDTemp;
1961}
1962
1963llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1964 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1965 std::string Name = getName({Prefix, "var"});
1966 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1967}
1968
1969namespace {
1970/// Common pre(post)-action for different OpenMP constructs.
1971class CommonActionTy final : public PrePostActionTy {
1972 llvm::FunctionCallee EnterCallee;
1973 ArrayRef<llvm::Value *> EnterArgs;
1974 llvm::FunctionCallee ExitCallee;
1975 ArrayRef<llvm::Value *> ExitArgs;
1976 bool Conditional;
1977 llvm::BasicBlock *ContBlock = nullptr;
1978
1979public:
1980 CommonActionTy(llvm::FunctionCallee EnterCallee,
1981 ArrayRef<llvm::Value *> EnterArgs,
1982 llvm::FunctionCallee ExitCallee,
1983 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1984 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1985 ExitArgs(ExitArgs), Conditional(Conditional) {}
1986 void Enter(CodeGenFunction &CGF) override {
1987 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1988 if (Conditional) {
1989 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1990 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1991 ContBlock = CGF.createBasicBlock("omp_if.end");
1992 // Generate the branch (If-stmt)
1993 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1994 CGF.EmitBlock(ThenBlock);
1995 }
1996 }
1997 void Done(CodeGenFunction &CGF) {
1998 // Emit the rest of blocks/branches
1999 CGF.EmitBranch(ContBlock);
2000 CGF.EmitBlock(ContBlock, true);
2001 }
2002 void Exit(CodeGenFunction &CGF) override {
2003 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2004 }
2005};
2006} // anonymous namespace
2007
2009 StringRef CriticalName,
2010 const RegionCodeGenTy &CriticalOpGen,
2011 SourceLocation Loc, const Expr *Hint) {
2012 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2013 // CriticalOpGen();
2014 // __kmpc_end_critical(ident_t *, gtid, Lock);
2015 // Prepare arguments and build a call to __kmpc_critical
2016 if (!CGF.HaveInsertPoint())
2017 return;
2018 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2019 getCriticalRegionLock(CriticalName)};
2020 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2021 std::end(Args));
2022 if (Hint) {
2023 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2024 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2025 }
2026 CommonActionTy Action(
2027 OMPBuilder.getOrCreateRuntimeFunction(
2028 CGM.getModule(),
2029 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2030 EnterArgs,
2031 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2032 OMPRTL___kmpc_end_critical),
2033 Args);
2034 CriticalOpGen.setAction(Action);
2035 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2036}
2037
2039 const RegionCodeGenTy &MasterOpGen,
2041 if (!CGF.HaveInsertPoint())
2042 return;
2043 // if(__kmpc_master(ident_t *, gtid)) {
2044 // MasterOpGen();
2045 // __kmpc_end_master(ident_t *, gtid);
2046 // }
2047 // Prepare arguments and build a call to __kmpc_master
2048 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2049 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2050 CGM.getModule(), OMPRTL___kmpc_master),
2051 Args,
2052 OMPBuilder.getOrCreateRuntimeFunction(
2053 CGM.getModule(), OMPRTL___kmpc_end_master),
2054 Args,
2055 /*Conditional=*/true);
2056 MasterOpGen.setAction(Action);
2057 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2058 Action.Done(CGF);
2059}
2060
2062 const RegionCodeGenTy &MaskedOpGen,
2063 SourceLocation Loc, const Expr *Filter) {
2064 if (!CGF.HaveInsertPoint())
2065 return;
2066 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2067 // MaskedOpGen();
2068 // __kmpc_end_masked(iden_t *, gtid);
2069 // }
2070 // Prepare arguments and build a call to __kmpc_masked
2071 llvm::Value *FilterVal = Filter
2072 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2073 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2074 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2075 FilterVal};
2076 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2077 getThreadID(CGF, Loc)};
2078 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2079 CGM.getModule(), OMPRTL___kmpc_masked),
2080 Args,
2081 OMPBuilder.getOrCreateRuntimeFunction(
2082 CGM.getModule(), OMPRTL___kmpc_end_masked),
2083 ArgsEnd,
2084 /*Conditional=*/true);
2085 MaskedOpGen.setAction(Action);
2086 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2087 Action.Done(CGF);
2088}
2089
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2095 OMPBuilder.createTaskyield(CGF.Builder);
2096 } else {
2097 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2098 llvm::Value *Args[] = {
2100 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2101 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2102 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2103 Args);
2104 }
2105
2106 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2107 Region->emitUntiedSwitch(CGF);
2108}
2109
2111 const RegionCodeGenTy &TaskgroupOpGen,
2113 if (!CGF.HaveInsertPoint())
2114 return;
2115 // __kmpc_taskgroup(ident_t *, gtid);
2116 // TaskgroupOpGen();
2117 // __kmpc_end_taskgroup(ident_t *, gtid);
2118 // Prepare arguments and build a call to __kmpc_taskgroup
2119 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2120 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2121 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2122 Args,
2123 OMPBuilder.getOrCreateRuntimeFunction(
2124 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2125 Args);
2126 TaskgroupOpGen.setAction(Action);
2127 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2128}
2129
2130/// Given an array of pointers to variables, project the address of a
2131/// given variable.
2133 unsigned Index, const VarDecl *Var) {
2134 // Pull out the pointer to the variable.
2135 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2136 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2137
2138 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2139 return Address(
2140 CGF.Builder.CreateBitCast(
2141 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2142 ElemTy, CGF.getContext().getDeclAlign(Var));
2143}
2144
2146 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2147 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2148 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2150 ASTContext &C = CGM.getContext();
2151 // void copy_func(void *LHSArg, void *RHSArg);
2152 FunctionArgList Args;
2153 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2155 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2157 Args.push_back(&LHSArg);
2158 Args.push_back(&RHSArg);
2159 const auto &CGFI =
2160 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2161 std::string Name =
2162 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2163 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2164 llvm::GlobalValue::InternalLinkage, Name,
2165 &CGM.getModule());
2167 Fn->setDoesNotRecurse();
2168 CodeGenFunction CGF(CGM);
2169 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2170 // Dest = (void*[n])(LHSArg);
2171 // Src = (void*[n])(RHSArg);
2173 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2174 ArgsElemType->getPointerTo()),
2175 ArgsElemType, CGF.getPointerAlign());
2177 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2178 ArgsElemType->getPointerTo()),
2179 ArgsElemType, CGF.getPointerAlign());
2180 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2181 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2182 // ...
2183 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2184 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2185 const auto *DestVar =
2186 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2187 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2188
2189 const auto *SrcVar =
2190 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2191 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2192
2193 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2194 QualType Type = VD->getType();
2195 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2196 }
2197 CGF.FinishFunction();
2198 return Fn;
2199}
2200
2202 const RegionCodeGenTy &SingleOpGen,
2204 ArrayRef<const Expr *> CopyprivateVars,
2205 ArrayRef<const Expr *> SrcExprs,
2206 ArrayRef<const Expr *> DstExprs,
2207 ArrayRef<const Expr *> AssignmentOps) {
2208 if (!CGF.HaveInsertPoint())
2209 return;
2210 assert(CopyprivateVars.size() == SrcExprs.size() &&
2211 CopyprivateVars.size() == DstExprs.size() &&
2212 CopyprivateVars.size() == AssignmentOps.size());
2214 // int32 did_it = 0;
2215 // if(__kmpc_single(ident_t *, gtid)) {
2216 // SingleOpGen();
2217 // __kmpc_end_single(ident_t *, gtid);
2218 // did_it = 1;
2219 // }
2220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2221 // <copy_func>, did_it);
2222
2223 Address DidIt = Address::invalid();
2224 if (!CopyprivateVars.empty()) {
2225 // int32 did_it = 0;
2226 QualType KmpInt32Ty =
2227 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2228 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2229 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2230 }
2231 // Prepare arguments and build a call to __kmpc_single
2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234 CGM.getModule(), OMPRTL___kmpc_single),
2235 Args,
2236 OMPBuilder.getOrCreateRuntimeFunction(
2237 CGM.getModule(), OMPRTL___kmpc_end_single),
2238 Args,
2239 /*Conditional=*/true);
2240 SingleOpGen.setAction(Action);
2241 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2242 if (DidIt.isValid()) {
2243 // did_it = 1;
2244 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2245 }
2246 Action.Done(CGF);
2247 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2248 // <copy_func>, did_it);
2249 if (DidIt.isValid()) {
2250 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2251 QualType CopyprivateArrayTy = C.getConstantArrayType(
2252 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2253 /*IndexTypeQuals=*/0);
2254 // Create a list of all private variables for copyprivate.
2255 Address CopyprivateList =
2256 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2257 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2258 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2259 CGF.Builder.CreateStore(
2261 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2262 CGF.VoidPtrTy),
2263 Elem);
2264 }
2265 // Build function that copies private values from single region to all other
2266 // threads in the corresponding parallel region.
2267 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2268 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2269 SrcExprs, DstExprs, AssignmentOps, Loc);
2270 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2272 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2273 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2274 llvm::Value *Args[] = {
2275 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2276 getThreadID(CGF, Loc), // i32 <gtid>
2277 BufSize, // size_t <buf_size>
2278 CL.emitRawPointer(CGF), // void *<copyprivate list>
2279 CpyFn, // void (*) (void *, void *) <copy_func>
2280 DidItVal // i32 did_it
2281 };
2282 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2283 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2284 Args);
2285 }
2286}
2287
2289 const RegionCodeGenTy &OrderedOpGen,
2290 SourceLocation Loc, bool IsThreads) {
2291 if (!CGF.HaveInsertPoint())
2292 return;
2293 // __kmpc_ordered(ident_t *, gtid);
2294 // OrderedOpGen();
2295 // __kmpc_end_ordered(ident_t *, gtid);
2296 // Prepare arguments and build a call to __kmpc_ordered
2297 if (IsThreads) {
2298 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300 CGM.getModule(), OMPRTL___kmpc_ordered),
2301 Args,
2302 OMPBuilder.getOrCreateRuntimeFunction(
2303 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2304 Args);
2305 OrderedOpGen.setAction(Action);
2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307 return;
2308 }
2309 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2310}
2311
2313 unsigned Flags;
2314 if (Kind == OMPD_for)
2315 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2316 else if (Kind == OMPD_sections)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2318 else if (Kind == OMPD_single)
2319 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2320 else if (Kind == OMPD_barrier)
2321 Flags = OMP_IDENT_BARRIER_EXPL;
2322 else
2323 Flags = OMP_IDENT_BARRIER_IMPL;
2324 return Flags;
2325}
2326
2328 CodeGenFunction &CGF, const OMPLoopDirective &S,
2329 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2330 // Check if the loop directive is actually a doacross loop directive. In this
2331 // case choose static, 1 schedule.
2332 if (llvm::any_of(
2333 S.getClausesOfKind<OMPOrderedClause>(),
2334 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2335 ScheduleKind = OMPC_SCHEDULE_static;
2336 // Chunk size is 1 in this case.
2337 llvm::APInt ChunkSize(32, 1);
2338 ChunkExpr = IntegerLiteral::Create(
2339 CGF.getContext(), ChunkSize,
2340 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2341 SourceLocation());
2342 }
2343}
2344
2346 OpenMPDirectiveKind Kind, bool EmitChecks,
2347 bool ForceSimpleCall) {
2348 // Check if we should use the OMPBuilder
2349 auto *OMPRegionInfo =
2350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2351 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2352 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2353 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2354 return;
2355 }
2356
2357 if (!CGF.HaveInsertPoint())
2358 return;
2359 // Build call __kmpc_cancel_barrier(loc, thread_id);
2360 // Build call __kmpc_barrier(loc, thread_id);
2361 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2362 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2363 // thread_id);
2364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2365 getThreadID(CGF, Loc)};
2366 if (OMPRegionInfo) {
2367 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2368 llvm::Value *Result = CGF.EmitRuntimeCall(
2369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2370 OMPRTL___kmpc_cancel_barrier),
2371 Args);
2372 if (EmitChecks) {
2373 // if (__kmpc_cancel_barrier()) {
2374 // exit from construct;
2375 // }
2376 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2377 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2378 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2379 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2380 CGF.EmitBlock(ExitBB);
2381 // exit from construct;
2382 CodeGenFunction::JumpDest CancelDestination =
2383 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2384 CGF.EmitBranchThroughCleanup(CancelDestination);
2385 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2386 }
2387 return;
2388 }
2389 }
2390 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2391 CGM.getModule(), OMPRTL___kmpc_barrier),
2392 Args);
2393}
2394
2396 Expr *ME, bool IsFatal) {
2397 llvm::Value *MVL =
2398 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2399 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2400 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2401 // *message)
2402 llvm::Value *Args[] = {
2403 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2404 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2405 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2407 CGM.getModule(), OMPRTL___kmpc_error),
2408 Args);
2409}
2410
2411/// Map the OpenMP loop schedule to the runtime enumeration.
2412static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2413 bool Chunked, bool Ordered) {
2414 switch (ScheduleKind) {
2415 case OMPC_SCHEDULE_static:
2416 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2417 : (Ordered ? OMP_ord_static : OMP_sch_static);
2418 case OMPC_SCHEDULE_dynamic:
2419 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2420 case OMPC_SCHEDULE_guided:
2421 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2422 case OMPC_SCHEDULE_runtime:
2423 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2424 case OMPC_SCHEDULE_auto:
2425 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2427 assert(!Chunked && "chunk was specified but schedule kind not known");
2428 return Ordered ? OMP_ord_static : OMP_sch_static;
2429 }
2430 llvm_unreachable("Unexpected runtime schedule");
2431}
2432
2433/// Map the OpenMP distribute schedule to the runtime enumeration.
2434static OpenMPSchedType
2436 // only static is allowed for dist_schedule
2437 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2438}
2439
2441 bool Chunked) const {
2442 OpenMPSchedType Schedule =
2443 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2444 return Schedule == OMP_sch_static;
2445}
2446
2448 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2449 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2450 return Schedule == OMP_dist_sch_static;
2451}
2452
2454 bool Chunked) const {
2455 OpenMPSchedType Schedule =
2456 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2457 return Schedule == OMP_sch_static_chunked;
2458}
2459
2461 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2462 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2463 return Schedule == OMP_dist_sch_static_chunked;
2464}
2465
2467 OpenMPSchedType Schedule =
2468 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2469 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2470 return Schedule != OMP_sch_static;
2471}
2472
2473static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2476 int Modifier = 0;
2477 switch (M1) {
2478 case OMPC_SCHEDULE_MODIFIER_monotonic:
2479 Modifier = OMP_sch_modifier_monotonic;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2482 Modifier = OMP_sch_modifier_nonmonotonic;
2483 break;
2484 case OMPC_SCHEDULE_MODIFIER_simd:
2485 if (Schedule == OMP_sch_static_chunked)
2486 Schedule = OMP_sch_static_balanced_chunked;
2487 break;
2490 break;
2491 }
2492 switch (M2) {
2493 case OMPC_SCHEDULE_MODIFIER_monotonic:
2494 Modifier = OMP_sch_modifier_monotonic;
2495 break;
2496 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 break;
2499 case OMPC_SCHEDULE_MODIFIER_simd:
2500 if (Schedule == OMP_sch_static_chunked)
2501 Schedule = OMP_sch_static_balanced_chunked;
2502 break;
2505 break;
2506 }
2507 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2508 // If the static schedule kind is specified or if the ordered clause is
2509 // specified, and if the nonmonotonic modifier is not specified, the effect is
2510 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2511 // modifier is specified, the effect is as if the nonmonotonic modifier is
2512 // specified.
2513 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2514 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2515 Schedule == OMP_sch_static_balanced_chunked ||
2516 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2517 Schedule == OMP_dist_sch_static_chunked ||
2518 Schedule == OMP_dist_sch_static))
2519 Modifier = OMP_sch_modifier_nonmonotonic;
2520 }
2521 return Schedule | Modifier;
2522}
2523
2526 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2527 bool Ordered, const DispatchRTInput &DispatchValues) {
2528 if (!CGF.HaveInsertPoint())
2529 return;
2530 OpenMPSchedType Schedule = getRuntimeSchedule(
2531 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2532 assert(Ordered ||
2533 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2534 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2535 Schedule != OMP_sch_static_balanced_chunked));
2536 // Call __kmpc_dispatch_init(
2537 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2538 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2539 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2540
2541 // If the Chunk was not specified in the clause - use default value 1.
2542 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2543 : CGF.Builder.getIntN(IVSize, 1);
2544 llvm::Value *Args[] = {
2545 emitUpdateLocation(CGF, Loc),
2546 getThreadID(CGF, Loc),
2547 CGF.Builder.getInt32(addMonoNonMonoModifier(
2548 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2549 DispatchValues.LB, // Lower
2550 DispatchValues.UB, // Upper
2551 CGF.Builder.getIntN(IVSize, 1), // Stride
2552 Chunk // Chunk
2553 };
2554 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2555 Args);
2556}
2557
2560 if (!CGF.HaveInsertPoint())
2561 return;
2562 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2563 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2564 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2565}
2566
2568 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2569 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2571 const CGOpenMPRuntime::StaticRTInput &Values) {
2572 if (!CGF.HaveInsertPoint())
2573 return;
2574
2575 assert(!Values.Ordered);
2576 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2577 Schedule == OMP_sch_static_balanced_chunked ||
2578 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2579 Schedule == OMP_dist_sch_static ||
2580 Schedule == OMP_dist_sch_static_chunked);
2581
2582 // Call __kmpc_for_static_init(
2583 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2584 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2585 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2586 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2587 llvm::Value *Chunk = Values.Chunk;
2588 if (Chunk == nullptr) {
2589 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2590 Schedule == OMP_dist_sch_static) &&
2591 "expected static non-chunked schedule");
2592 // If the Chunk was not specified in the clause - use default value 1.
2593 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2594 } else {
2595 assert((Schedule == OMP_sch_static_chunked ||
2596 Schedule == OMP_sch_static_balanced_chunked ||
2597 Schedule == OMP_ord_static_chunked ||
2598 Schedule == OMP_dist_sch_static_chunked) &&
2599 "expected static chunked schedule");
2600 }
2601 llvm::Value *Args[] = {
2602 UpdateLocation,
2603 ThreadId,
2604 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2605 M2)), // Schedule type
2606 Values.IL.emitRawPointer(CGF), // &isLastIter
2607 Values.LB.emitRawPointer(CGF), // &LB
2608 Values.UB.emitRawPointer(CGF), // &UB
2609 Values.ST.emitRawPointer(CGF), // &Stride
2610 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2611 Chunk // Chunk
2612 };
2613 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2614}
2615
2618 OpenMPDirectiveKind DKind,
2619 const OpenMPScheduleTy &ScheduleKind,
2620 const StaticRTInput &Values) {
2621 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2622 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2623 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2624 "Expected loop-based or sections-based directive.");
2625 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2627 ? OMP_IDENT_WORK_LOOP
2628 : OMP_IDENT_WORK_SECTIONS);
2629 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2630 llvm::FunctionCallee StaticInitFunction =
2631 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2632 false);
2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2636}
2637
2641 const CGOpenMPRuntime::StaticRTInput &Values) {
2642 OpenMPSchedType ScheduleNum =
2643 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2644 llvm::Value *UpdatedLocation =
2645 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2646 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2647 llvm::FunctionCallee StaticInitFunction;
2648 bool isGPUDistribute =
2649 CGM.getLangOpts().OpenMPIsTargetDevice &&
2650 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2651 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2652 Values.IVSize, Values.IVSigned, isGPUDistribute);
2653
2654 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2655 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2657}
2658
2661 OpenMPDirectiveKind DKind) {
2662 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2663 DKind == OMPD_sections) &&
2664 "Expected distribute, for, or sections directive kind");
2665 if (!CGF.HaveInsertPoint())
2666 return;
2667 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2668 llvm::Value *Args[] = {
2671 (DKind == OMPD_target_teams_loop)
2672 ? OMP_IDENT_WORK_DISTRIBUTE
2673 : isOpenMPLoopDirective(DKind)
2674 ? OMP_IDENT_WORK_LOOP
2675 : OMP_IDENT_WORK_SECTIONS),
2676 getThreadID(CGF, Loc)};
2678 if (isOpenMPDistributeDirective(DKind) &&
2679 CGM.getLangOpts().OpenMPIsTargetDevice &&
2680 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2681 CGF.EmitRuntimeCall(
2682 OMPBuilder.getOrCreateRuntimeFunction(
2683 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2684 Args);
2685 else
2686 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2687 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2688 Args);
2689}
2690
2693 unsigned IVSize,
2694 bool IVSigned) {
2695 if (!CGF.HaveInsertPoint())
2696 return;
2697 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2698 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2699 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2700 Args);
2701}
2702
2704 SourceLocation Loc, unsigned IVSize,
2705 bool IVSigned, Address IL,
2706 Address LB, Address UB,
2707 Address ST) {
2708 // Call __kmpc_dispatch_next(
2709 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2710 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2711 // kmp_int[32|64] *p_stride);
2712 llvm::Value *Args[] = {
2714 IL.emitRawPointer(CGF), // &isLastIter
2715 LB.emitRawPointer(CGF), // &Lower
2716 UB.emitRawPointer(CGF), // &Upper
2717 ST.emitRawPointer(CGF) // &Stride
2718 };
2719 llvm::Value *Call = CGF.EmitRuntimeCall(
2720 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2721 return CGF.EmitScalarConversion(
2722 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2723 CGF.getContext().BoolTy, Loc);
2724}
2725
2727 llvm::Value *NumThreads,
2729 if (!CGF.HaveInsertPoint())
2730 return;
2731 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2732 llvm::Value *Args[] = {
2734 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2736 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2737 Args);
2738}
2739
2741 ProcBindKind ProcBind,
2743 if (!CGF.HaveInsertPoint())
2744 return;
2745 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2746 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2747 llvm::Value *Args[] = {
2749 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2750 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2751 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2752 Args);
2753}
2754
2756 SourceLocation Loc, llvm::AtomicOrdering AO) {
2757 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2758 OMPBuilder.createFlush(CGF.Builder);
2759 } else {
2760 if (!CGF.HaveInsertPoint())
2761 return;
2762 // Build call void __kmpc_flush(ident_t *loc)
2763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2764 CGM.getModule(), OMPRTL___kmpc_flush),
2765 emitUpdateLocation(CGF, Loc));
2766 }
2767}
2768
2769namespace {
2770/// Indexes of fields for type kmp_task_t.
2771enum KmpTaskTFields {
2772 /// List of shared variables.
2773 KmpTaskTShareds,
2774 /// Task routine.
2775 KmpTaskTRoutine,
2776 /// Partition id for the untied tasks.
2777 KmpTaskTPartId,
2778 /// Function with call of destructors for private variables.
2779 Data1,
2780 /// Task priority.
2781 Data2,
2782 /// (Taskloops only) Lower bound.
2783 KmpTaskTLowerBound,
2784 /// (Taskloops only) Upper bound.
2785 KmpTaskTUpperBound,
2786 /// (Taskloops only) Stride.
2787 KmpTaskTStride,
2788 /// (Taskloops only) Is last iteration flag.
2789 KmpTaskTLastIter,
2790 /// (Taskloops only) Reduction data.
2791 KmpTaskTReductions,
2792};
2793} // anonymous namespace
2794
2796 // If we are in simd mode or there are no entries, we don't need to do
2797 // anything.
2798 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2799 return;
2800
2801 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2802 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2803 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2805 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2806 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2808 I != E; ++I) {
2809 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2810 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2812 I->getFirst(), EntryInfo.Line, 1);
2813 break;
2814 }
2815 }
2816 }
2817 switch (Kind) {
2818 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2819 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2820 DiagnosticsEngine::Error, "Offloading entry for target region in "
2821 "%0 is incorrect: either the "
2822 "address or the ID is invalid.");
2823 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2824 } break;
2825 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2826 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2827 DiagnosticsEngine::Error, "Offloading entry for declare target "
2828 "variable %0 is incorrect: the "
2829 "address is invalid.");
2830 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2831 } break;
2832 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2833 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2835 "Offloading entry for declare target variable is incorrect: the "
2836 "address is invalid.");
2837 CGM.getDiags().Report(DiagID);
2838 } break;
2839 }
2840 };
2841
2842 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2843}
2844
2846 if (!KmpRoutineEntryPtrTy) {
2847 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2849 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2851 KmpRoutineEntryPtrQTy = C.getPointerType(
2852 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2854 }
2855}
2856
2857namespace {
2858struct PrivateHelpersTy {
2859 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2860 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2861 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2862 PrivateElemInit(PrivateElemInit) {}
2863 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2864 const Expr *OriginalRef = nullptr;
2865 const VarDecl *Original = nullptr;
2866 const VarDecl *PrivateCopy = nullptr;
2867 const VarDecl *PrivateElemInit = nullptr;
2868 bool isLocalPrivate() const {
2869 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2870 }
2871};
2872typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2873} // anonymous namespace
2874
2875static bool isAllocatableDecl(const VarDecl *VD) {
2876 const VarDecl *CVD = VD->getCanonicalDecl();
2877 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2878 return false;
2879 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2880 // Use the default allocation.
2881 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2882 !AA->getAllocator());
2883}
2884
2885static RecordDecl *
2887 if (!Privates.empty()) {
2888 ASTContext &C = CGM.getContext();
2889 // Build struct .kmp_privates_t. {
2890 // /* private vars */
2891 // };
2892 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2893 RD->startDefinition();
2894 for (const auto &Pair : Privates) {
2895 const VarDecl *VD = Pair.second.Original;
2897 // If the private variable is a local variable with lvalue ref type,
2898 // allocate the pointer instead of the pointee type.
2899 if (Pair.second.isLocalPrivate()) {
2900 if (VD->getType()->isLValueReferenceType())
2901 Type = C.getPointerType(Type);
2902 if (isAllocatableDecl(VD))
2903 Type = C.getPointerType(Type);
2904 }
2906 if (VD->hasAttrs()) {
2907 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2908 E(VD->getAttrs().end());
2909 I != E; ++I)
2910 FD->addAttr(*I);
2911 }
2912 }
2913 RD->completeDefinition();
2914 return RD;
2915 }
2916 return nullptr;
2917}
2918
2919static RecordDecl *
2921 QualType KmpInt32Ty,
2922 QualType KmpRoutineEntryPointerQTy) {
2923 ASTContext &C = CGM.getContext();
2924 // Build struct kmp_task_t {
2925 // void * shareds;
2926 // kmp_routine_entry_t routine;
2927 // kmp_int32 part_id;
2928 // kmp_cmplrdata_t data1;
2929 // kmp_cmplrdata_t data2;
2930 // For taskloops additional fields:
2931 // kmp_uint64 lb;
2932 // kmp_uint64 ub;
2933 // kmp_int64 st;
2934 // kmp_int32 liter;
2935 // void * reductions;
2936 // };
2937 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2938 UD->startDefinition();
2939 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2940 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2941 UD->completeDefinition();
2942 QualType KmpCmplrdataTy = C.getRecordType(UD);
2943 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2944 RD->startDefinition();
2945 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2946 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2947 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2949 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2950 if (isOpenMPTaskLoopDirective(Kind)) {
2951 QualType KmpUInt64Ty =
2952 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2953 QualType KmpInt64Ty =
2954 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2955 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2956 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2957 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2958 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2959 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2960 }
2961 RD->completeDefinition();
2962 return RD;
2963}
2964
2965static RecordDecl *
2967 ArrayRef<PrivateDataTy> Privates) {
2968 ASTContext &C = CGM.getContext();
2969 // Build struct kmp_task_t_with_privates {
2970 // kmp_task_t task_data;
2971 // .kmp_privates_t. privates;
2972 // };
2973 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2974 RD->startDefinition();
2975 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2976 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2977 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2978 RD->completeDefinition();
2979 return RD;
2980}
2981
2982/// Emit a proxy function which accepts kmp_task_t as the second
2983/// argument.
2984/// \code
2985/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2986/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2987/// For taskloops:
2988/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2989/// tt->reductions, tt->shareds);
2990/// return 0;
2991/// }
2992/// \endcode
2993static llvm::Function *
2995 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2996 QualType KmpTaskTWithPrivatesPtrQTy,
2997 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2998 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2999 llvm::Value *TaskPrivatesMap) {
3000 ASTContext &C = CGM.getContext();
3001 FunctionArgList Args;
3002 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3004 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3005 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3007 Args.push_back(&GtidArg);
3008 Args.push_back(&TaskTypeArg);
3009 const auto &TaskEntryFnInfo =
3010 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3011 llvm::FunctionType *TaskEntryTy =
3012 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3013 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3014 auto *TaskEntry = llvm::Function::Create(
3015 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3016 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3017 TaskEntry->setDoesNotRecurse();
3018 CodeGenFunction CGF(CGM);
3019 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3020 Loc, Loc);
3021
3022 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3023 // tt,
3024 // For taskloops:
3025 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3026 // tt->task_data.shareds);
3027 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3028 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3029 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3030 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3031 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3032 const auto *KmpTaskTWithPrivatesQTyRD =
3033 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3034 LValue Base =
3035 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3036 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3037 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3038 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3039 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3040
3041 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3042 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3043 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3045 CGF.ConvertTypeForMem(SharedsPtrTy));
3046
3047 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3048 llvm::Value *PrivatesParam;
3049 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3050 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3051 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3052 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3053 } else {
3054 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3055 }
3056
3057 llvm::Value *CommonArgs[] = {
3058 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3059 CGF.Builder
3061 CGF.VoidPtrTy, CGF.Int8Ty)
3062 .emitRawPointer(CGF)};
3063 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3064 std::end(CommonArgs));
3065 if (isOpenMPTaskLoopDirective(Kind)) {
3066 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3067 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3068 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3069 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3070 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3071 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3072 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3073 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3074 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3075 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3076 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3077 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3078 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3079 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3080 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3081 CallArgs.push_back(LBParam);
3082 CallArgs.push_back(UBParam);
3083 CallArgs.push_back(StParam);
3084 CallArgs.push_back(LIParam);
3085 CallArgs.push_back(RParam);
3086 }
3087 CallArgs.push_back(SharedsParam);
3088
3089 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3090 CallArgs);
3091 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3092 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3093 CGF.FinishFunction();
3094 return TaskEntry;
3095}
3096
3099 QualType KmpInt32Ty,
3100 QualType KmpTaskTWithPrivatesPtrQTy,
3101 QualType KmpTaskTWithPrivatesQTy) {
3102 ASTContext &C = CGM.getContext();
3103 FunctionArgList Args;
3104 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3106 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3107 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3109 Args.push_back(&GtidArg);
3110 Args.push_back(&TaskTypeArg);
3111 const auto &DestructorFnInfo =
3112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3113 llvm::FunctionType *DestructorFnTy =
3114 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3115 std::string Name =
3116 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3117 auto *DestructorFn =
3118 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3119 Name, &CGM.getModule());
3120 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3121 DestructorFnInfo);
3122 DestructorFn->setDoesNotRecurse();
3123 CodeGenFunction CGF(CGM);
3124 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3125 Args, Loc, Loc);
3126
3128 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3129 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3130 const auto *KmpTaskTWithPrivatesQTyRD =
3131 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3132 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3133 Base = CGF.EmitLValueForField(Base, *FI);
3134 for (const auto *Field :
3135 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3136 if (QualType::DestructionKind DtorKind =
3137 Field->getType().isDestructedType()) {
3138 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3139 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3140 }
3141 }
3142 CGF.FinishFunction();
3143 return DestructorFn;
3144}
3145
3146/// Emit a privates mapping function for correct handling of private and
3147/// firstprivate variables.
3148/// \code
3149/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3150/// **noalias priv1,..., <tyn> **noalias privn) {
3151/// *priv1 = &.privates.priv1;
3152/// ...;
3153/// *privn = &.privates.privn;
3154/// }
3155/// \endcode
3156static llvm::Value *
3158 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3159 ArrayRef<PrivateDataTy> Privates) {
3160 ASTContext &C = CGM.getContext();
3161 FunctionArgList Args;
3162 ImplicitParamDecl TaskPrivatesArg(
3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3166 Args.push_back(&TaskPrivatesArg);
3167 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3168 unsigned Counter = 1;
3169 for (const Expr *E : Data.PrivateVars) {
3170 Args.push_back(ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172 C.getPointerType(C.getPointerType(E->getType()))
3173 .withConst()
3174 .withRestrict(),
3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.FirstprivateVars) {
3181 Args.push_back(ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183 C.getPointerType(C.getPointerType(E->getType()))
3184 .withConst()
3185 .withRestrict(),
3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const Expr *E : Data.LastprivateVars) {
3192 Args.push_back(ImplicitParamDecl::Create(
3193 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3194 C.getPointerType(C.getPointerType(E->getType()))
3195 .withConst()
3196 .withRestrict(),
3198 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3199 PrivateVarsPos[VD] = Counter;
3200 ++Counter;
3201 }
3202 for (const VarDecl *VD : Data.PrivateLocals) {
3204 if (VD->getType()->isLValueReferenceType())
3205 Ty = C.getPointerType(Ty);
3206 if (isAllocatableDecl(VD))
3207 Ty = C.getPointerType(Ty);
3208 Args.push_back(ImplicitParamDecl::Create(
3209 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3210 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3212 PrivateVarsPos[VD] = Counter;
3213 ++Counter;
3214 }
3215 const auto &TaskPrivatesMapFnInfo =
3216 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3217 llvm::FunctionType *TaskPrivatesMapTy =
3218 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3219 std::string Name =
3220 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3221 auto *TaskPrivatesMap = llvm::Function::Create(
3222 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3223 &CGM.getModule());
3224 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3225 TaskPrivatesMapFnInfo);
3226 if (CGM.getLangOpts().Optimize) {
3227 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3228 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3229 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3230 }
3231 CodeGenFunction CGF(CGM);
3232 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3233 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3234
3235 // *privi = &.privates.privi;
3237 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3238 TaskPrivatesArg.getType()->castAs<PointerType>());
3239 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3240 Counter = 0;
3241 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3242 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3243 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3244 LValue RefLVal =
3245 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3246 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3247 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3248 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3249 ++Counter;
3250 }
3251 CGF.FinishFunction();
3252 return TaskPrivatesMap;
3253}
3254
3255/// Emit initialization for private variables in task-based directives.
3258 Address KmpTaskSharedsPtr, LValue TDBase,
3259 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3260 QualType SharedsTy, QualType SharedsPtrTy,
3261 const OMPTaskDataTy &Data,
3262 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3263 ASTContext &C = CGF.getContext();
3264 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3265 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3266 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3267 ? OMPD_taskloop
3268 : OMPD_task;
3269 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3270 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3271 LValue SrcBase;
3272 bool IsTargetTask =
3273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3274 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3275 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3276 // PointersArray, SizesArray, and MappersArray. The original variables for
3277 // these arrays are not captured and we get their addresses explicitly.
3278 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3279 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3280 SrcBase = CGF.MakeAddrLValue(
3282 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3283 CGF.ConvertTypeForMem(SharedsTy)),
3284 SharedsTy);
3285 }
3286 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3287 for (const PrivateDataTy &Pair : Privates) {
3288 // Do not initialize private locals.
3289 if (Pair.second.isLocalPrivate()) {
3290 ++FI;
3291 continue;
3292 }
3293 const VarDecl *VD = Pair.second.PrivateCopy;
3294 const Expr *Init = VD->getAnyInitializer();
3295 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3296 !CGF.isTrivialInitializer(Init)))) {
3297 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3298 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3299 const VarDecl *OriginalVD = Pair.second.Original;
3300 // Check if the variable is the target-based BasePointersArray,
3301 // PointersArray, SizesArray, or MappersArray.
3302 LValue SharedRefLValue;
3303 QualType Type = PrivateLValue.getType();
3304 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3305 if (IsTargetTask && !SharedField) {
3306 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3307 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3308 cast<CapturedDecl>(OriginalVD->getDeclContext())
3309 ->getNumParams() == 0 &&
3310 isa<TranslationUnitDecl>(
3311 cast<CapturedDecl>(OriginalVD->getDeclContext())
3312 ->getDeclContext()) &&
3313 "Expected artificial target data variable.");
3314 SharedRefLValue =
3315 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3316 } else if (ForDup) {
3317 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3318 SharedRefLValue = CGF.MakeAddrLValue(
3319 SharedRefLValue.getAddress().withAlignment(
3320 C.getDeclAlign(OriginalVD)),
3321 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3322 SharedRefLValue.getTBAAInfo());
3323 } else if (CGF.LambdaCaptureFields.count(
3324 Pair.second.Original->getCanonicalDecl()) > 0 ||
3325 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3326 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3327 } else {
3328 // Processing for implicitly captured variables.
3329 InlinedOpenMPRegionRAII Region(
3330 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3331 /*HasCancel=*/false, /*NoInheritance=*/true);
3332 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3333 }
3334 if (Type->isArrayType()) {
3335 // Initialize firstprivate array.
3336 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3337 // Perform simple memcpy.
3338 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3339 } else {
3340 // Initialize firstprivate array using element-by-element
3341 // initialization.
3343 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3344 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3345 Address SrcElement) {
3346 // Clean up any temporaries needed by the initialization.
3347 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3348 InitScope.addPrivate(Elem, SrcElement);
3349 (void)InitScope.Privatize();
3350 // Emit initialization for single element.
3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3352 CGF, &CapturesInfo);
3353 CGF.EmitAnyExprToMem(Init, DestElement,
3354 Init->getType().getQualifiers(),
3355 /*IsInitializer=*/false);
3356 });
3357 }
3358 } else {
3359 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3360 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3361 (void)InitScope.Privatize();
3362 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3363 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3364 /*capturedByInit=*/false);
3365 }
3366 } else {
3367 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3368 }
3369 }
3370 ++FI;
3371 }
3372}
3373
3374/// Check if duplication function is required for taskloops.
3376 ArrayRef<PrivateDataTy> Privates) {
3377 bool InitRequired = false;
3378 for (const PrivateDataTy &Pair : Privates) {
3379 if (Pair.second.isLocalPrivate())
3380 continue;
3381 const VarDecl *VD = Pair.second.PrivateCopy;
3382 const Expr *Init = VD->getAnyInitializer();
3383 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3385 if (InitRequired)
3386 break;
3387 }
3388 return InitRequired;
3389}
3390
3391
3392/// Emit task_dup function (for initialization of
3393/// private/firstprivate/lastprivate vars and last_iter flag)
3394/// \code
3395/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3396/// lastpriv) {
3397/// // setup lastprivate flag
3398/// task_dst->last = lastpriv;
3399/// // could be constructor calls here...
3400/// }
3401/// \endcode
3402static llvm::Value *
3405 QualType KmpTaskTWithPrivatesPtrQTy,
3406 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3407 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3408 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3409 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3410 ASTContext &C = CGM.getContext();
3411 FunctionArgList Args;
3412 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3413 KmpTaskTWithPrivatesPtrQTy,
3415 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3416 KmpTaskTWithPrivatesPtrQTy,
3418 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3420 Args.push_back(&DstArg);
3421 Args.push_back(&SrcArg);
3422 Args.push_back(&LastprivArg);
3423 const auto &TaskDupFnInfo =
3424 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3425 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3426 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3427 auto *TaskDup = llvm::Function::Create(
3428 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3429 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3430 TaskDup->setDoesNotRecurse();
3431 CodeGenFunction CGF(CGM);
3432 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3433 Loc);
3434
3435 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3436 CGF.GetAddrOfLocalVar(&DstArg),
3437 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3438 // task_dst->liter = lastpriv;
3439 if (WithLastIter) {
3440 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3442 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3443 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3444 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3445 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3446 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3447 }
3448
3449 // Emit initial values for private copies (if any).
3450 assert(!Privates.empty());
3451 Address KmpTaskSharedsPtr = Address::invalid();
3452 if (!Data.FirstprivateVars.empty()) {
3453 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3454 CGF.GetAddrOfLocalVar(&SrcArg),
3455 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3457 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3458 KmpTaskSharedsPtr = Address(
3460 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3461 KmpTaskTShareds)),
3462 Loc),
3463 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3464 }
3465 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3466 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3467 CGF.FinishFunction();
3468 return TaskDup;
3469}
3470
3471/// Checks if destructor function is required to be generated.
3472/// \return true if cleanups are required, false otherwise.
3473static bool
3474checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3475 ArrayRef<PrivateDataTy> Privates) {
3476 for (const PrivateDataTy &P : Privates) {
3477 if (P.second.isLocalPrivate())
3478 continue;
3479 QualType Ty = P.second.Original->getType().getNonReferenceType();
3480 if (Ty.isDestructedType())
3481 return true;
3482 }
3483 return false;
3484}
3485
3486namespace {
3487/// Loop generator for OpenMP iterator expression.
3488class OMPIteratorGeneratorScope final
3489 : public CodeGenFunction::OMPPrivateScope {
3490 CodeGenFunction &CGF;
3491 const OMPIteratorExpr *E = nullptr;
3494 OMPIteratorGeneratorScope() = delete;
3495 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3496
3497public:
3498 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3499 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3500 if (!E)
3501 return;
3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3505 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3506 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3507 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3508 addPrivate(
3509 HelperData.CounterVD,
3510 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3511 }
3512 Privatize();
3513
3514 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3515 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3516 LValue CLVal =
3517 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3518 HelperData.CounterVD->getType());
3519 // Counter = 0;
3521 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3522 CLVal);
3523 CodeGenFunction::JumpDest &ContDest =
3524 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3525 CodeGenFunction::JumpDest &ExitDest =
3526 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3527 // N = <number-of_iterations>;
3528 llvm::Value *N = Uppers[I];
3529 // cont:
3530 // if (Counter < N) goto body; else goto exit;
3531 CGF.EmitBlock(ContDest.getBlock());
3532 auto *CVal =
3533 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3534 llvm::Value *Cmp =
3536 ? CGF.Builder.CreateICmpSLT(CVal, N)
3537 : CGF.Builder.CreateICmpULT(CVal, N);
3538 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3539 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3540 // body:
3541 CGF.EmitBlock(BodyBB);
3542 // Iteri = Begini + Counter * Stepi;
3543 CGF.EmitIgnoredExpr(HelperData.Update);
3544 }
3545 }
3546 ~OMPIteratorGeneratorScope() {
3547 if (!E)
3548 return;
3549 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3550 // Counter = Counter + 1;
3551 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3552 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3553 // goto cont;
3554 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3555 // exit:
3556 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3557 }
3558 }
3559};
3560} // namespace
3561
3562static std::pair<llvm::Value *, llvm::Value *>
3564 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3565 llvm::Value *Addr;
3566 if (OASE) {
3567 const Expr *Base = OASE->getBase();
3568 Addr = CGF.EmitScalarExpr(Base);
3569 } else {
3570 Addr = CGF.EmitLValue(E).getPointer(CGF);
3571 }
3572 llvm::Value *SizeVal;
3573 QualType Ty = E->getType();
3574 if (OASE) {
3575 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3576 for (const Expr *SE : OASE->getDimensions()) {
3577 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3578 Sz = CGF.EmitScalarConversion(
3579 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3580 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3581 }
3582 } else if (const auto *ASE =
3583 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3584 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3585 Address UpAddrAddress = UpAddrLVal.getAddress();
3586 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3587 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3588 /*Idx0=*/1);
3589 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3590 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3591 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3592 } else {
3593 SizeVal = CGF.getTypeSize(Ty);
3594 }
3595 return std::make_pair(Addr, SizeVal);
3596}
3597
3598/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3599static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3600 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3601 if (KmpTaskAffinityInfoTy.isNull()) {
3602 RecordDecl *KmpAffinityInfoRD =
3603 C.buildImplicitRecord("kmp_task_affinity_info_t");
3604 KmpAffinityInfoRD->startDefinition();
3605 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3606 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3607 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3608 KmpAffinityInfoRD->completeDefinition();
3609 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3610 }
3611}
3612
3616 llvm::Function *TaskFunction, QualType SharedsTy,
3617 Address Shareds, const OMPTaskDataTy &Data) {
3620 // Aggregate privates and sort them by the alignment.
3621 const auto *I = Data.PrivateCopies.begin();
3622 for (const Expr *E : Data.PrivateVars) {
3623 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624 Privates.emplace_back(
3625 C.getDeclAlign(VD),
3626 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627 /*PrivateElemInit=*/nullptr));
3628 ++I;
3629 }
3630 I = Data.FirstprivateCopies.begin();
3631 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3632 for (const Expr *E : Data.FirstprivateVars) {
3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634 Privates.emplace_back(
3635 C.getDeclAlign(VD),
3636 PrivateHelpersTy(
3637 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3638 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3639 ++I;
3640 ++IElemInitRef;
3641 }
3642 I = Data.LastprivateCopies.begin();
3643 for (const Expr *E : Data.LastprivateVars) {
3644 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3645 Privates.emplace_back(
3646 C.getDeclAlign(VD),
3647 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3648 /*PrivateElemInit=*/nullptr));
3649 ++I;
3650 }
3651 for (const VarDecl *VD : Data.PrivateLocals) {
3652 if (isAllocatableDecl(VD))
3653 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3654 else
3655 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3656 }
3657 llvm::stable_sort(Privates,
3658 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3659 return L.first > R.first;
3660 });
3661 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3662 // Build type kmp_routine_entry_t (if not built yet).
3663 emitKmpRoutineEntryT(KmpInt32Ty);
3664 // Build type kmp_task_t (if not built yet).
3665 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3668 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3669 }
3671 } else {
3672 assert((D.getDirectiveKind() == OMPD_task ||
3673 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3674 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3675 "Expected taskloop, task or target directive");
3676 if (SavedKmpTaskTQTy.isNull()) {
3678 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3679 }
3681 }
3682 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3683 // Build particular struct kmp_task_t for the given task.
3684 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3686 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3687 QualType KmpTaskTWithPrivatesPtrQTy =
3688 C.getPointerType(KmpTaskTWithPrivatesQTy);
3689 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3690 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3691 KmpTaskTWithPrivatesTy->getPointerTo();
3692 llvm::Value *KmpTaskTWithPrivatesTySize =
3693 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3694 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3695
3696 // Emit initial values for private copies (if any).
3697 llvm::Value *TaskPrivatesMap = nullptr;
3698 llvm::Type *TaskPrivatesMapTy =
3699 std::next(TaskFunction->arg_begin(), 3)->getType();
3700 if (!Privates.empty()) {
3701 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3702 TaskPrivatesMap =
3703 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3704 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3705 TaskPrivatesMap, TaskPrivatesMapTy);
3706 } else {
3707 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3708 cast<llvm::PointerType>(TaskPrivatesMapTy));
3709 }
3710 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3711 // kmp_task_t *tt);
3712 llvm::Function *TaskEntry = emitProxyTaskFunction(
3713 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3714 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3715 TaskPrivatesMap);
3716
3717 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3718 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3719 // kmp_routine_entry_t *task_entry);
3720 // Task flags. Format is taken from
3721 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3722 // description of kmp_tasking_flags struct.
3723 enum {
3724 TiedFlag = 0x1,
3725 FinalFlag = 0x2,
3726 DestructorsFlag = 0x8,
3727 PriorityFlag = 0x20,
3728 DetachableFlag = 0x40,
3729 };
3730 unsigned Flags = Data.Tied ? TiedFlag : 0;
3731 bool NeedsCleanup = false;
3732 if (!Privates.empty()) {
3733 NeedsCleanup =
3734 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3735 if (NeedsCleanup)
3736 Flags = Flags | DestructorsFlag;
3737 }
3738 if (Data.Priority.getInt())
3739 Flags = Flags | PriorityFlag;
3740 if (D.hasClausesOfKind<OMPDetachClause>())
3741 Flags = Flags | DetachableFlag;
3742 llvm::Value *TaskFlags =
3743 Data.Final.getPointer()
3744 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3745 CGF.Builder.getInt32(FinalFlag),
3746 CGF.Builder.getInt32(/*C=*/0))
3747 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3748 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3749 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3751 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3753 TaskEntry, KmpRoutineEntryPtrTy)};
3754 llvm::Value *NewTask;
3755 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3756 // Check if we have any device clause associated with the directive.
3757 const Expr *Device = nullptr;
3758 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3759 Device = C->getDevice();
3760 // Emit device ID if any otherwise use default value.
3761 llvm::Value *DeviceID;
3762 if (Device)
3763 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3764 CGF.Int64Ty, /*isSigned=*/true);
3765 else
3766 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3767 AllocArgs.push_back(DeviceID);
3768 NewTask = CGF.EmitRuntimeCall(
3769 OMPBuilder.getOrCreateRuntimeFunction(
3770 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3771 AllocArgs);
3772 } else {
3773 NewTask =
3774 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3775 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3776 AllocArgs);
3777 }
3778 // Emit detach clause initialization.
3779 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3780 // task_descriptor);
3781 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3782 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3783 LValue EvtLVal = CGF.EmitLValue(Evt);
3784
3785 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3786 // int gtid, kmp_task_t *task);
3787 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3788 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3789 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3790 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3791 OMPBuilder.getOrCreateRuntimeFunction(
3792 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3793 {Loc, Tid, NewTask});
3794 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3795 Evt->getExprLoc());
3796 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3797 }
3798 // Process affinity clauses.
3799 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3800 // Process list of affinity data.
3802 Address AffinitiesArray = Address::invalid();
3803 // Calculate number of elements to form the array of affinity data.
3804 llvm::Value *NumOfElements = nullptr;
3805 unsigned NumAffinities = 0;
3806 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3807 if (const Expr *Modifier = C->getModifier()) {
3808 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3809 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3810 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3811 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3812 NumOfElements =
3813 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3814 }
3815 } else {
3816 NumAffinities += C->varlist_size();
3817 }
3818 }
3820 // Fields ids in kmp_task_affinity_info record.
3821 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3822
3823 QualType KmpTaskAffinityInfoArrayTy;
3824 if (NumOfElements) {
3825 NumOfElements = CGF.Builder.CreateNUWAdd(
3826 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3827 auto *OVE = new (C) OpaqueValueExpr(
3828 Loc,
3829 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3830 VK_PRValue);
3831 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3832 RValue::get(NumOfElements));
3833 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3835 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3836 // Properly emit variable-sized array.
3837 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3839 CGF.EmitVarDecl(*PD);
3840 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3841 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3842 /*isSigned=*/false);
3843 } else {
3844 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3846 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3847 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3848 AffinitiesArray =
3849 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3850 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3851 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3852 /*isSigned=*/false);
3853 }
3854
3855 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3856 // Fill array by elements without iterators.
3857 unsigned Pos = 0;
3858 bool HasIterator = false;
3859 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3860 if (C->getModifier()) {
3861 HasIterator = true;
3862 continue;
3863 }
3864 for (const Expr *E : C->varlist()) {
3865 llvm::Value *Addr;
3866 llvm::Value *Size;
3867 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3868 LValue Base =
3869 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3871 // affs[i].base_addr = &<Affinities[i].second>;
3872 LValue BaseAddrLVal = CGF.EmitLValueForField(
3873 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3874 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3875 BaseAddrLVal);
3876 // affs[i].len = sizeof(<Affinities[i].second>);
3877 LValue LenLVal = CGF.EmitLValueForField(
3878 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3879 CGF.EmitStoreOfScalar(Size, LenLVal);
3880 ++Pos;
3881 }
3882 }
3883 LValue PosLVal;
3884 if (HasIterator) {
3885 PosLVal = CGF.MakeAddrLValue(
3886 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3887 C.getSizeType());
3888 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3889 }
3890 // Process elements with iterators.
3891 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3892 const Expr *Modifier = C->getModifier();
3893 if (!Modifier)
3894 continue;
3895 OMPIteratorGeneratorScope IteratorScope(
3896 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3897 for (const Expr *E : C->varlist()) {
3898 llvm::Value *Addr;
3899 llvm::Value *Size;
3900 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3901 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3902 LValue Base =
3903 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3905 // affs[i].base_addr = &<Affinities[i].second>;
3906 LValue BaseAddrLVal = CGF.EmitLValueForField(
3907 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3908 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3909 BaseAddrLVal);
3910 // affs[i].len = sizeof(<Affinities[i].second>);
3911 LValue LenLVal = CGF.EmitLValueForField(
3912 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3913 CGF.EmitStoreOfScalar(Size, LenLVal);
3914 Idx = CGF.Builder.CreateNUWAdd(
3915 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3916 CGF.EmitStoreOfScalar(Idx, PosLVal);
3917 }
3918 }
3919 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3920 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3921 // naffins, kmp_task_affinity_info_t *affin_list);
3922 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3923 llvm::Value *GTid = getThreadID(CGF, Loc);
3924 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3925 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3926 // FIXME: Emit the function and ignore its result for now unless the
3927 // runtime function is properly implemented.
3928 (void)CGF.EmitRuntimeCall(
3929 OMPBuilder.getOrCreateRuntimeFunction(
3930 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3931 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3932 }
3933 llvm::Value *NewTaskNewTaskTTy =
3935 NewTask, KmpTaskTWithPrivatesPtrTy);
3936 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3937 KmpTaskTWithPrivatesQTy);
3938 LValue TDBase =
3939 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3940 // Fill the data in the resulting kmp_task_t record.
3941 // Copy shareds if there are any.
3942 Address KmpTaskSharedsPtr = Address::invalid();
3943 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3944 KmpTaskSharedsPtr = Address(
3945 CGF.EmitLoadOfScalar(
3947 TDBase,
3948 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3949 Loc),
3950 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3951 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3952 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3953 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3954 }
3955 // Emit initial values for private copies (if any).
3957 if (!Privates.empty()) {
3958 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3959 SharedsTy, SharedsPtrTy, Data, Privates,
3960 /*ForDup=*/false);
3961 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3962 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3963 Result.TaskDupFn = emitTaskDupFunction(
3964 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3965 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3966 /*WithLastIter=*/!Data.LastprivateVars.empty());
3967 }
3968 }
3969 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3970 enum { Priority = 0, Destructors = 1 };
3971 // Provide pointer to function with destructors for privates.
3972 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3973 const RecordDecl *KmpCmplrdataUD =
3974 (*FI)->getType()->getAsUnionType()->getDecl();
3975 if (NeedsCleanup) {
3976 llvm::Value *DestructorFn = emitDestructorsFunction(
3977 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3978 KmpTaskTWithPrivatesQTy);
3979 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3980 LValue DestructorsLV = CGF.EmitLValueForField(
3981 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3983 DestructorFn, KmpRoutineEntryPtrTy),
3984 DestructorsLV);
3985 }
3986 // Set priority.
3987 if (Data.Priority.getInt()) {
3988 LValue Data2LV = CGF.EmitLValueForField(
3989 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3990 LValue PriorityLV = CGF.EmitLValueForField(
3991 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3992 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3993 }
3994 Result.NewTask = NewTask;
3995 Result.TaskEntry = TaskEntry;
3996 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3997 Result.TDBase = TDBase;
3998 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3999 return Result;
4000}
4001
4002/// Translates internal dependency kind into the runtime kind.
4004 RTLDependenceKindTy DepKind;
4005 switch (K) {
4006 case OMPC_DEPEND_in:
4007 DepKind = RTLDependenceKindTy::DepIn;
4008 break;
4009 // Out and InOut dependencies must use the same code.
4010 case OMPC_DEPEND_out:
4011 case OMPC_DEPEND_inout:
4012 DepKind = RTLDependenceKindTy::DepInOut;
4013 break;
4014 case OMPC_DEPEND_mutexinoutset:
4015 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4016 break;
4017 case OMPC_DEPEND_inoutset:
4018 DepKind = RTLDependenceKindTy::DepInOutSet;
4019 break;
4020 case OMPC_DEPEND_outallmemory:
4021 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4022 break;
4023 case OMPC_DEPEND_source:
4024 case OMPC_DEPEND_sink:
4025 case OMPC_DEPEND_depobj:
4026 case OMPC_DEPEND_inoutallmemory:
4028 llvm_unreachable("Unknown task dependence type");
4029 }
4030 return DepKind;
4031}
4032
4033/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4034static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4035 QualType &FlagsTy) {
4036 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4037 if (KmpDependInfoTy.isNull()) {
4038 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4039 KmpDependInfoRD->startDefinition();
4040 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4041 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4042 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4043 KmpDependInfoRD->completeDefinition();
4044 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4045 }
4046}
4047
4048std::pair<llvm::Value *, LValue>
4052 QualType FlagsTy;
4053 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4054 RecordDecl *KmpDependInfoRD =
4055 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4056 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4058 DepobjLVal.getAddress().withElementType(
4059 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4060 KmpDependInfoPtrTy->castAs<PointerType>());
4061 Address DepObjAddr = CGF.Builder.CreateGEP(
4062 CGF, Base.getAddress(),
4063 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4064 LValue NumDepsBase = CGF.MakeAddrLValue(
4065 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4066 // NumDeps = deps[i].base_addr;
4067 LValue BaseAddrLVal = CGF.EmitLValueForField(
4068 NumDepsBase,
4069 *std::next(KmpDependInfoRD->field_begin(),
4070 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4071 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4072 return std::make_pair(NumDeps, Base);
4073}
4074
4075static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4076 llvm::PointerUnion<unsigned *, LValue *> Pos,
4078 Address DependenciesArray) {
4079 CodeGenModule &CGM = CGF.CGM;
4080 ASTContext &C = CGM.getContext();
4081 QualType FlagsTy;
4082 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4083 RecordDecl *KmpDependInfoRD =
4084 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4085 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4086
4087 OMPIteratorGeneratorScope IteratorScope(
4088 CGF, cast_or_null<OMPIteratorExpr>(
4089 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4090 : nullptr));
4091 for (const Expr *E : Data.DepExprs) {
4092 llvm::Value *Addr;
4093 llvm::Value *Size;
4094
4095 // The expression will be a nullptr in the 'omp_all_memory' case.
4096 if (E) {
4097 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4098 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4099 } else {
4100 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4101 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4102 }
4103 LValue Base;
4104 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4105 Base = CGF.MakeAddrLValue(
4106 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4107 } else {
4108 assert(E && "Expected a non-null expression");
4109 LValue &PosLVal = *Pos.get<LValue *>();
4110 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4111 Base = CGF.MakeAddrLValue(
4112 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4113 }
4114 // deps[i].base_addr = &<Dependencies[i].second>;
4115 LValue BaseAddrLVal = CGF.EmitLValueForField(
4116 Base,
4117 *std::next(KmpDependInfoRD->field_begin(),
4118 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4119 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4120 // deps[i].len = sizeof(<Dependencies[i].second>);
4121 LValue LenLVal = CGF.EmitLValueForField(
4122 Base, *std::next(KmpDependInfoRD->field_begin(),
4123 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4124 CGF.EmitStoreOfScalar(Size, LenLVal);
4125 // deps[i].flags = <Dependencies[i].first>;
4126 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4127 LValue FlagsLVal = CGF.EmitLValueForField(
4128 Base,
4129 *std::next(KmpDependInfoRD->field_begin(),
4130 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4132 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4133 FlagsLVal);
4134 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4135 ++(*P);
4136 } else {
4137 LValue &PosLVal = *Pos.get<LValue *>();
4138 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4139 Idx = CGF.Builder.CreateNUWAdd(Idx,
4140 llvm::ConstantInt::get(Idx->getType(), 1));
4141 CGF.EmitStoreOfScalar(Idx, PosLVal);
4142 }
4143 }
4144}
4145
4147 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4149 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4150 "Expected depobj dependency kind.");
4152 SmallVector<LValue, 4> SizeLVals;
4153 ASTContext &C = CGF.getContext();
4154 {
4155 OMPIteratorGeneratorScope IteratorScope(
4156 CGF, cast_or_null<OMPIteratorExpr>(
4157 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4158 : nullptr));
4159 for (const Expr *E : Data.DepExprs) {
4160 llvm::Value *NumDeps;
4161 LValue Base;
4162 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4163 std::tie(NumDeps, Base) =
4164 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4165 LValue NumLVal = CGF.MakeAddrLValue(
4166 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4167 C.getUIntPtrType());
4168 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4169 NumLVal.getAddress());
4170 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4171 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4172 CGF.EmitStoreOfScalar(Add, NumLVal);
4173 SizeLVals.push_back(NumLVal);
4174 }
4175 }
4176 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4177 llvm::Value *Size =
4178 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4179 Sizes.push_back(Size);
4180 }
4181 return Sizes;
4182}
4183
4185 QualType &KmpDependInfoTy,
4186 LValue PosLVal,
4188 Address DependenciesArray) {
4189 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4190 "Expected depobj dependency kind.");
4191 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4192 {
4193 OMPIteratorGeneratorScope IteratorScope(
4194 CGF, cast_or_null<OMPIteratorExpr>(
4195 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4196 : nullptr));
4197 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4198 const Expr *E = Data.DepExprs[I];
4199 llvm::Value *NumDeps;
4200 LValue Base;
4201 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4202 std::tie(NumDeps, Base) =
4203 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4204
4205 // memcopy dependency data.
4206 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4207 ElSize,
4208 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4209 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4210 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4211 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4212
4213 // Increase pos.
4214 // pos += size;
4215 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4216 CGF.EmitStoreOfScalar(Add, PosLVal);
4217 }
4218 }
4219}
4220
4221std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4224 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4225 return D.DepExprs.empty();
4226 }))
4227 return std::make_pair(nullptr, Address::invalid());
4228 // Process list of dependencies.
4230 Address DependenciesArray = Address::invalid();
4231 llvm::Value *NumOfElements = nullptr;
4232 unsigned NumDependencies = std::accumulate(
4233 Dependencies.begin(), Dependencies.end(), 0,
4234 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4235 return D.DepKind == OMPC_DEPEND_depobj
4236 ? V
4237 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4238 });
4239 QualType FlagsTy;
4240 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241 bool HasDepobjDeps = false;
4242 bool HasRegularWithIterators = false;
4243 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4244 llvm::Value *NumOfRegularWithIterators =
4245 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4246 // Calculate number of depobj dependencies and regular deps with the
4247 // iterators.
4248 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4249 if (D.DepKind == OMPC_DEPEND_depobj) {
4252 for (llvm::Value *Size : Sizes) {
4253 NumOfDepobjElements =
4254 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4255 }
4256 HasDepobjDeps = true;
4257 continue;
4258 }
4259 // Include number of iterations, if any.
4260
4261 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4262 llvm::Value *ClauseIteratorSpace =
4263 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4264 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4265 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4266 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4267 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4268 }
4269 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4270 ClauseIteratorSpace,
4271 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4272 NumOfRegularWithIterators =
4273 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4274 HasRegularWithIterators = true;
4275 continue;
4276 }
4277 }
4278
4279 QualType KmpDependInfoArrayTy;
4280 if (HasDepobjDeps || HasRegularWithIterators) {
4281 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4282 /*isSigned=*/false);
4283 if (HasDepobjDeps) {
4284 NumOfElements =
4285 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4286 }
4287 if (HasRegularWithIterators) {
4288 NumOfElements =
4289 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4290 }
4291 auto *OVE = new (C) OpaqueValueExpr(
4292 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4293 VK_PRValue);
4294 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4295 RValue::get(NumOfElements));
4296 KmpDependInfoArrayTy =
4297 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4298 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4299 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4300 // Properly emit variable-sized array.
4301 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4303 CGF.EmitVarDecl(*PD);
4304 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4305 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4306 /*isSigned=*/false);
4307 } else {
4308 KmpDependInfoArrayTy = C.getConstantArrayType(
4309 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4310 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4311 DependenciesArray =
4312 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4313 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4314 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4315 /*isSigned=*/false);
4316 }
4317 unsigned Pos = 0;
4318 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4319 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4320 Dependencies[I].IteratorExpr)
4321 continue;
4322 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4323 DependenciesArray);
4324 }
4325 // Copy regular dependencies with iterators.
4326 LValue PosLVal = CGF.MakeAddrLValue(
4327 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4328 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4329 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4330 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4331 !Dependencies[I].IteratorExpr)
4332 continue;
4333 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4334 DependenciesArray);
4335 }
4336 // Copy final depobj arrays without iterators.
4337 if (HasDepobjDeps) {
4338 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4339 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4340 continue;
4341 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4342 DependenciesArray);
4343 }
4344 }
4345 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4347 return std::make_pair(NumOfElements, DependenciesArray);
4348}
4349
4351 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4353 if (Dependencies.DepExprs.empty())
4354 return Address::invalid();
4355 // Process list of dependencies.
4357 Address DependenciesArray = Address::invalid();
4358 unsigned NumDependencies = Dependencies.DepExprs.size();
4359 QualType FlagsTy;
4360 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4361 RecordDecl *KmpDependInfoRD =
4362 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4363
4364 llvm::Value *Size;
4365 // Define type kmp_depend_info[<Dependencies.size()>];
4366 // For depobj reserve one extra element to store the number of elements.
4367 // It is required to handle depobj(x) update(in) construct.
4368 // kmp_depend_info[<Dependencies.size()>] deps;
4369 llvm::Value *NumDepsVal;
4370 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4371 if (const auto *IE =
4372 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4373 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4374 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4375 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4376 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4377 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4378 }
4379 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4380 NumDepsVal);
4381 CharUnits SizeInBytes =
4382 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4383 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4384 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4385 NumDepsVal =
4386 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4387 } else {
4388 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4389 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4390 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4391 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4392 Size = CGM.getSize(Sz.alignTo(Align));
4393 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4394 }
4395 // Need to allocate on the dynamic memory.
4396 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4397 // Use default allocator.
4398 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4399 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4400
4401 llvm::Value *Addr =
4402 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4403 CGM.getModule(), OMPRTL___kmpc_alloc),
4404 Args, ".dep.arr.addr");
4405 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4407 Addr, KmpDependInfoLlvmTy->getPointerTo());
4408 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4409 // Write number of elements in the first element of array for depobj.
4410 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4411 // deps[i].base_addr = NumDependencies;
4412 LValue BaseAddrLVal = CGF.EmitLValueForField(
4413 Base,
4414 *std::next(KmpDependInfoRD->field_begin(),
4415 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4416 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4417 llvm::PointerUnion<unsigned *, LValue *> Pos;
4418 unsigned Idx = 1;
4419 LValue PosLVal;
4420 if (Dependencies.IteratorExpr) {
4421 PosLVal = CGF.MakeAddrLValue(
4422 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4423 C.getSizeType());
4424 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4425 /*IsInit=*/true);
4426 Pos = &PosLVal;
4427 } else {
4428 Pos = &Idx;
4429 }
4430 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4431 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4432 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4433 CGF.Int8Ty);
4434 return DependenciesArray;
4435}
4436
4440 QualType FlagsTy;
4441 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4442 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4443 C.VoidPtrTy.castAs<PointerType>());
4444 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4446 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4448 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4449 Addr.getElementType(), Addr.emitRawPointer(CGF),
4450 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4451 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4452 CGF.VoidPtrTy);
4453 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4454 // Use default allocator.
4455 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4456 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4457
4458 // _kmpc_free(gtid, addr, nullptr);
4459 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4460 CGM.getModule(), OMPRTL___kmpc_free),
4461 Args);
4462}
4463
4465 OpenMPDependClauseKind NewDepKind,
4468 QualType FlagsTy;
4469 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4470 RecordDecl *KmpDependInfoRD =
4471 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4472 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4473 llvm::Value *NumDeps;
4474 LValue Base;
4475 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4476
4477 Address Begin = Base.getAddress();
4478 // Cast from pointer to array type to pointer to single element.
4479 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4480 Begin.emitRawPointer(CGF), NumDeps);
4481 // The basic structure here is a while-do loop.
4482 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4483 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4484 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4485 CGF.EmitBlock(BodyBB);
4486 llvm::PHINode *ElementPHI =
4487 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4488 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4489 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4490 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4491 Base.getTBAAInfo());
4492 // deps[i].flags = NewDepKind;
4493 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4494 LValue FlagsLVal = CGF.EmitLValueForField(
4495 Base, *std::next(KmpDependInfoRD->field_begin(),
4496 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4498 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4499 FlagsLVal);
4500
4501 // Shift the address forward by one element.
4502 llvm::Value *ElementNext =
4503 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4504 .emitRawPointer(CGF);
4505 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4506 llvm::Value *IsEmpty =
4507 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4508 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4509 // Done.
4510 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4511}
4512
4515 llvm::Function *TaskFunction,
4516 QualType SharedsTy, Address Shareds,
4517 const Expr *IfCond,
4518 const OMPTaskDataTy &Data) {
4519 if (!CGF.HaveInsertPoint())
4520 return;
4521
4523 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4524 llvm::Value *NewTask = Result.NewTask;
4525 llvm::Function *TaskEntry = Result.TaskEntry;
4526 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4527 LValue TDBase = Result.TDBase;
4528 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4529 // Process list of dependences.
4530 Address DependenciesArray = Address::invalid();
4531 llvm::Value *NumOfElements;
4532 std::tie(NumOfElements, DependenciesArray) =
4533 emitDependClause(CGF, Data.Dependences, Loc);
4534
4535 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4536 // libcall.
4537 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4538 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4539 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4540 // list is not empty
4541 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4542 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4543 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4544 llvm::Value *DepTaskArgs[7];
4545 if (!Data.Dependences.empty()) {
4546 DepTaskArgs[0] = UpLoc;
4547 DepTaskArgs[1] = ThreadID;
4548 DepTaskArgs[2] = NewTask;
4549 DepTaskArgs[3] = NumOfElements;
4550 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4551 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4552 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4553 }
4554 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4555 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4556 if (!Data.Tied) {
4557 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4558 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4559 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4560 }
4561 if (!Data.Dependences.empty()) {
4562 CGF.EmitRuntimeCall(
4563 OMPBuilder.getOrCreateRuntimeFunction(
4564 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4565 DepTaskArgs);
4566 } else {
4567 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4568 CGM.getModule(), OMPRTL___kmpc_omp_task),
4569 TaskArgs);
4570 }
4571 // Check if parent region is untied and build return for untied task;
4572 if (auto *Region =
4573 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4574 Region->emitUntiedSwitch(CGF);
4575 };
4576
4577 llvm::Value *DepWaitTaskArgs[7];
4578 if (!Data.Dependences.empty()) {
4579 DepWaitTaskArgs[0] = UpLoc;
4580 DepWaitTaskArgs[1] = ThreadID;
4581 DepWaitTaskArgs[2] = NumOfElements;
4582 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4583 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4584 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4585 DepWaitTaskArgs[6] =
4586 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4587 }
4588 auto &M = CGM.getModule();
4589 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4590 TaskEntry, &Data, &DepWaitTaskArgs,
4592 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4593 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4594 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4595 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4596 // is specified.
4597 if (!Data.Dependences.empty())
4598 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4599 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4600 DepWaitTaskArgs);
4601 // Call proxy_task_entry(gtid, new_task);
4602 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4603 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4604 Action.Enter(CGF);
4605 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4606 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4607 OutlinedFnArgs);
4608 };
4609
4610 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4611 // kmp_task_t *new_task);
4612 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4613 // kmp_task_t *new_task);
4614 RegionCodeGenTy RCG(CodeGen);
4615 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4616 M, OMPRTL___kmpc_omp_task_begin_if0),
4617 TaskArgs,
4618 OMPBuilder.getOrCreateRuntimeFunction(
4619 M, OMPRTL___kmpc_omp_task_complete_if0),
4620 TaskArgs);
4621 RCG.setAction(Action);
4622 RCG(CGF);
4623 };
4624
4625 if (IfCond) {
4626 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4627 } else {
4628 RegionCodeGenTy ThenRCG(ThenCodeGen);
4629 ThenRCG(CGF);
4630 }
4631}
4632
4634 const OMPLoopDirective &D,
4635 llvm::Function *TaskFunction,
4636 QualType SharedsTy, Address Shareds,
4637 const Expr *IfCond,
4638 const OMPTaskDataTy &Data) {
4639 if (!CGF.HaveInsertPoint())
4640 return;
4642 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4643 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4644 // libcall.
4645 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4646 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4647 // sched, kmp_uint64 grainsize, void *task_dup);
4648 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4649 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4650 llvm::Value *IfVal;
4651 if (IfCond) {
4652 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4653 /*isSigned=*/true);
4654 } else {
4655 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4656 }
4657
4658 LValue LBLVal = CGF.EmitLValueForField(
4659 Result.TDBase,
4660 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4661 const auto *LBVar =
4662 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4663 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4664 /*IsInitializer=*/true);
4665 LValue UBLVal = CGF.EmitLValueForField(
4666 Result.TDBase,
4667 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4668 const auto *UBVar =
4669 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4670 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4671 /*IsInitializer=*/true);
4672 LValue StLVal = CGF.EmitLValueForField(
4673 Result.TDBase,
4674 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4675 const auto *StVar =
4676 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4677 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4678 /*IsInitializer=*/true);
4679 // Store reductions address.
4680 LValue RedLVal = CGF.EmitLValueForField(
4681 Result.TDBase,
4682 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4683 if (Data.Reductions) {
4684 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4685 } else {
4686 CGF.EmitNullInitialization(RedLVal.getAddress(),
4687 CGF.getContext().VoidPtrTy);
4688 }
4689 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4690 llvm::Value *TaskArgs[] = {
4691 UpLoc,
4692 ThreadID,
4693 Result.NewTask,
4694 IfVal,
4695 LBLVal.getPointer(CGF),
4696 UBLVal.getPointer(CGF),
4697 CGF.EmitLoadOfScalar(StLVal, Loc),
4698 llvm::ConstantInt::getSigned(
4699 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4700 llvm::ConstantInt::getSigned(
4701 CGF.IntTy, Data.Schedule.getPointer()
4702 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4703 : NoSchedule),
4704 Data.Schedule.getPointer()
4705 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4706 /*isSigned=*/false)
4707 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4709 Result.TaskDupFn, CGF.VoidPtrTy)
4710 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4711 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4712 CGM.getModule(), OMPRTL___kmpc_taskloop),
4713 TaskArgs);
4714}
4715
4716/// Emit reduction operation for each element of array (required for
4717/// array sections) LHS op = RHS.
4718/// \param Type Type of array.
4719/// \param LHSVar Variable on the left side of the reduction operation
4720/// (references element of array in original variable).
4721/// \param RHSVar Variable on the right side of the reduction operation
4722/// (references element of array in original variable).
4723/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4724/// RHSVar.
4726 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4727 const VarDecl *RHSVar,
4728 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4729 const Expr *, const Expr *)> &RedOpGen,
4730 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4731 const Expr *UpExpr = nullptr) {
4732 // Perform element-by-element initialization.
4733 QualType ElementTy;
4734 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4735 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4736
4737 // Drill down to the base element type on both arrays.
4738 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4739 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4740
4741 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4742 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4743 // Cast from pointer to array type to pointer to single element.
4744 llvm::Value *LHSEnd =
4745 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4746 // The basic structure here is a while-do loop.
4747 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4748 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4749 llvm::Value *IsEmpty =
4750 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4751 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4752
4753 // Enter the loop body, making that address the current address.
4754 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4755 CGF.EmitBlock(BodyBB);
4756
4757 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4758
4759 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4760 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4761 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4762 Address RHSElementCurrent(
4763 RHSElementPHI, RHSAddr.getElementType(),
4764 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4765
4766 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4767 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4768 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4769 Address LHSElementCurrent(
4770 LHSElementPHI, LHSAddr.getElementType(),
4771 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4772
4773 // Emit copy.
4774 CodeGenFunction::OMPPrivateScope Scope(CGF);
4775 Scope.addPrivate(LHSVar, LHSElementCurrent);
4776 Scope.addPrivate(RHSVar, RHSElementCurrent);
4777 Scope.Privatize();
4778 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4779 Scope.ForceCleanup();
4780
4781 // Shift the address forward by one element.
4782 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4783 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4784 "omp.arraycpy.dest.element");
4785 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4786 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4787 "omp.arraycpy.src.element");
4788 // Check whether we've reached the end.
4789 llvm::Value *Done =
4790 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4791 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4792 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4793 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4794
4795 // Done.
4796 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4797}
4798
4799/// Emit reduction combiner. If the combiner is a simple expression emit it as
4800/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4801/// UDR combiner function.
4803 const Expr *ReductionOp) {
4804 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4805 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4806 if (const auto *DRE =
4807 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4808 if (const auto *DRD =
4809 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4810 std::pair<llvm::Function *, llvm::Function *> Reduction =
4813 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4814 CGF.EmitIgnoredExpr(ReductionOp);
4815 return;
4816 }
4817 CGF.EmitIgnoredExpr(ReductionOp);
4818}
4819
4821 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4823 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4825
4826 // void reduction_func(void *LHSArg, void *RHSArg);
4827 FunctionArgList Args;
4828 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4830 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4832 Args.push_back(&LHSArg);
4833 Args.push_back(&RHSArg);
4834 const auto &CGFI =
4836 std::string Name = getReductionFuncName(ReducerName);
4837 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4838 llvm::GlobalValue::InternalLinkage, Name,
4839 &CGM.getModule());
4841 Fn->setDoesNotRecurse();
4842 CodeGenFunction CGF(CGM);
4843 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4844
4845 // Dst = (void*[n])(LHSArg);
4846 // Src = (void*[n])(RHSArg);
4848 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4849 ArgsElemType->getPointerTo()),
4850 ArgsElemType, CGF.getPointerAlign());
4852 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4853 ArgsElemType->getPointerTo()),
4854 ArgsElemType, CGF.getPointerAlign());
4855
4856 // ...
4857 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4858 // ...
4860 const auto *IPriv = Privates.begin();
4861 unsigned Idx = 0;
4862 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4863 const auto *RHSVar =
4864 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4865 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4866 const auto *LHSVar =
4867 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4868 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4869 QualType PrivTy = (*IPriv)->getType();
4870 if (PrivTy->isVariablyModifiedType()) {
4871 // Get array size and emit VLA type.
4872 ++Idx;
4873 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4874 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4875 const VariableArrayType *VLA =
4876 CGF.getContext().getAsVariableArrayType(PrivTy);
4877 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4879 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4880 CGF.EmitVariablyModifiedType(PrivTy);
4881 }
4882 }
4883 Scope.Privatize();
4884 IPriv = Privates.begin();
4885 const auto *ILHS = LHSExprs.begin();
4886 const auto *IRHS = RHSExprs.begin();
4887 for (const Expr *E : ReductionOps) {
4888 if ((*IPriv)->getType()->isArrayType()) {
4889 // Emit reduction for array section.
4890 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4891 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4893 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4894 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4895 emitReductionCombiner(CGF, E);
4896 });
4897 } else {
4898 // Emit reduction for array subscript or single variable.
4900 }
4901 ++IPriv;
4902 ++ILHS;
4903 ++IRHS;
4904 }
4905 Scope.ForceCleanup();
4906 CGF.FinishFunction();
4907 return Fn;
4908}
4909
4911 const Expr *ReductionOp,
4912 const Expr *PrivateRef,
4913 const DeclRefExpr *LHS,
4914 const DeclRefExpr *RHS) {
4915 if (PrivateRef->getType()->isArrayType()) {
4916 // Emit reduction for array section.
4917 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4918 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4920 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4921 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4922 emitReductionCombiner(CGF, ReductionOp);
4923 });
4924 } else {
4925 // Emit reduction for array subscript or single variable.
4926 emitReductionCombiner(CGF, ReductionOp);
4927 }
4928}
4929
4931 ArrayRef<const Expr *> Privates,
4932 ArrayRef<const Expr *> LHSExprs,
4933 ArrayRef<const Expr *> RHSExprs,
4934 ArrayRef<const Expr *> ReductionOps,
4935 ReductionOptionsTy Options) {
4936 if (!CGF.HaveInsertPoint())
4937 return;
4938
4939 bool WithNowait = Options.WithNowait;
4940 bool SimpleReduction = Options.SimpleReduction;
4941
4942 // Next code should be emitted for reduction:
4943 //
4944 // static kmp_critical_name lock = { 0 };
4945 //
4946 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4947 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4948 // ...
4949 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4950 // *(Type<n>-1*)rhs[<n>-1]);
4951 // }
4952 //
4953 // ...
4954 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4955 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4956 // RedList, reduce_func, &<lock>)) {
4957 // case 1:
4958 // ...
4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4960 // ...
4961 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4962 // break;
4963 // case 2:
4964 // ...
4965 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4966 // ...
4967 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4968 // break;
4969 // default:;
4970 // }
4971 //
4972 // if SimpleReduction is true, only the next code is generated:
4973 // ...
4974 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4975 // ...
4976
4978
4979 if (SimpleReduction) {
4981 const auto *IPriv = Privates.begin();
4982 const auto *ILHS = LHSExprs.begin();
4983 const auto *IRHS = RHSExprs.begin();
4984 for (const Expr *E : ReductionOps) {
4985 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4986 cast<DeclRefExpr>(*IRHS));
4987 ++IPriv;
4988 ++ILHS;
4989 ++IRHS;
4990 }
4991 return;
4992 }
4993
4994 // 1. Build a list of reduction variables.
4995 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4996 auto Size = RHSExprs.size();
4997 for (const Expr *E : Privates) {
4999 // Reserve place for array size.
5000 ++Size;
5001 }
5002 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5003 QualType ReductionArrayTy = C.getConstantArrayType(
5004 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5005 /*IndexTypeQuals=*/0);
5006 RawAddress ReductionList =
5007 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5008 const auto *IPriv = Privates.begin();
5009 unsigned Idx = 0;
5010 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5011 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5012 CGF.Builder.CreateStore(
5014 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5015 Elem);
5016 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5017 // Store array size.
5018 ++Idx;
5019 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5020 llvm::Value *Size = CGF.Builder.CreateIntCast(
5021 CGF.getVLASize(
5022 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5023 .NumElts,
5024 CGF.SizeTy, /*isSigned=*/false);
5025 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5026 Elem);
5027 }
5028 }
5029
5030 // 2. Emit reduce_func().
5031 llvm::Function *ReductionFn = emitReductionFunction(
5032 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5033 Privates, LHSExprs, RHSExprs, ReductionOps);
5034
5035 // 3. Create static kmp_critical_name lock = { 0 };
5036 std::string Name = getName({"reduction"});
5037 llvm::Value *Lock = getCriticalRegionLock(Name);
5038
5039 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5040 // RedList, reduce_func, &<lock>);
5041 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5042 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5043 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5044 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045 ReductionList.getPointer(), CGF.VoidPtrTy);
5046 llvm::Value *Args[] = {
5047 IdentTLoc, // ident_t *<loc>
5048 ThreadId, // i32 <gtid>
5049 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5050 ReductionArrayTySize, // size_type sizeof(RedList)
5051 RL, // void *RedList
5052 ReductionFn, // void (*) (void *, void *) <reduce_func>
5053 Lock // kmp_critical_name *&<lock>
5054 };
5055 llvm::Value *Res = CGF.EmitRuntimeCall(
5056 OMPBuilder.getOrCreateRuntimeFunction(
5057 CGM.getModule(),
5058 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5059 Args);
5060
5061 // 5. Build switch(res)
5062 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5063 llvm::SwitchInst *SwInst =
5064 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5065
5066 // 6. Build case 1:
5067 // ...
5068 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5069 // ...
5070 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5071 // break;
5072 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5073 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5074 CGF.EmitBlock(Case1BB);
5075
5076 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5077 llvm::Value *EndArgs[] = {
5078 IdentTLoc, // ident_t *<loc>
5079 ThreadId, // i32 <gtid>
5080 Lock // kmp_critical_name *&<lock>
5081 };
5082 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5083 CodeGenFunction &CGF, PrePostActionTy &Action) {
5085 const auto *IPriv = Privates.begin();
5086 const auto *ILHS = LHSExprs.begin();
5087 const auto *IRHS = RHSExprs.begin();
5088 for (const Expr *E : ReductionOps) {
5089 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5090 cast<DeclRefExpr>(*IRHS));
5091 ++IPriv;
5092 ++ILHS;
5093 ++IRHS;
5094 }
5095 };
5096 RegionCodeGenTy RCG(CodeGen);
5097 CommonActionTy Action(
5098 nullptr, std::nullopt,
5099 OMPBuilder.getOrCreateRuntimeFunction(
5100 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5101 : OMPRTL___kmpc_end_reduce),
5102 EndArgs);
5103 RCG.setAction(Action);
5104 RCG(CGF);
5105
5106 CGF.EmitBranch(DefaultBB);
5107
5108 // 7. Build case 2:
5109 // ...
5110 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5111 // ...
5112 // break;
5113 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5114 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5115 CGF.EmitBlock(Case2BB);
5116
5117 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5118 CodeGenFunction &CGF, PrePostActionTy &Action) {
5119 const auto *ILHS = LHSExprs.begin();
5120 const auto *IRHS = RHSExprs.begin();
5121 const auto *IPriv = Privates.begin();
5122 for (const Expr *E : ReductionOps) {
5123 const Expr *XExpr = nullptr;
5124 const Expr *EExpr = nullptr;
5125 const Expr *UpExpr = nullptr;
5126 BinaryOperatorKind BO = BO_Comma;
5127 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5128 if (BO->getOpcode() == BO_Assign) {
5129 XExpr = BO->getLHS();
5130 UpExpr = BO->getRHS();
5131 }
5132 }
5133 // Try to emit update expression as a simple atomic.
5134 const Expr *RHSExpr = UpExpr;
5135 if (RHSExpr) {
5136 // Analyze RHS part of the whole expression.
5137 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5138 RHSExpr->IgnoreParenImpCasts())) {
5139 // If this is a conditional operator, analyze its condition for
5140 // min/max reduction operator.
5141 RHSExpr = ACO->getCond();
5142 }
5143 if (const auto *BORHS =
5144 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5145 EExpr = BORHS->getRHS();
5146 BO = BORHS->getOpcode();
5147 }
5148 }
5149 if (XExpr) {
5150 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5151 auto &&AtomicRedGen = [BO, VD,
5152 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5153 const Expr *EExpr, const Expr *UpExpr) {
5154 LValue X = CGF.EmitLValue(XExpr);
5155 RValue E;
5156 if (EExpr)
5157 E = CGF.EmitAnyExpr(EExpr);
5158 CGF.EmitOMPAtomicSimpleUpdateExpr(
5159 X, E, BO, /*IsXLHSInRHSPart=*/true,
5160 llvm::AtomicOrdering::Monotonic, Loc,
5161 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5162 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5163 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5164 CGF.emitOMPSimpleStore(
5165 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5166 VD->getType().getNonReferenceType(), Loc);
5167 PrivateScope.addPrivate(VD, LHSTemp);
5168 (void)PrivateScope.Privatize();
5169 return CGF.EmitAnyExpr(UpExpr);
5170 });
5171 };
5172 if ((*IPriv)->getType()->isArrayType()) {
5173 // Emit atomic reduction for array section.
5174 const auto *RHSVar =
5175 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5176 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5177 AtomicRedGen, XExpr, EExpr, UpExpr);
5178 } else {
5179 // Emit atomic reduction for array subscript or single variable.
5180 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5181 }
5182 } else {
5183 // Emit as a critical region.
5184 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5185 const Expr *, const Expr *) {
5187 std::string Name = RT.getName({"atomic_reduction"});
5189 CGF, Name,
5190 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5191 Action.Enter(CGF);
5193 },
5194 Loc);
5195 };
5196 if ((*IPriv)->getType()->isArrayType()) {
5197 const auto *LHSVar =
5198 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5199 const auto *RHSVar =
5200 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5201 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5202 CritRedGen);
5203 } else {
5204 CritRedGen(CGF, nullptr, nullptr, nullptr);
5205 }
5206 }
5207 ++ILHS;
5208 ++IRHS;
5209 ++IPriv;
5210 }
5211 };
5212 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5213 if (!WithNowait) {
5214 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5215 llvm::Value *EndArgs[] = {
5216 IdentTLoc, // ident_t *<loc>
5217 ThreadId, // i32 <gtid>
5218 Lock // kmp_critical_name *&<lock>
5219 };
5220 CommonActionTy Action(nullptr, std::nullopt,
5221 OMPBuilder.getOrCreateRuntimeFunction(
5222 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5223 EndArgs);
5224 AtomicRCG.setAction(Action);
5225 AtomicRCG(CGF);
5226 } else {
5227 AtomicRCG(CGF);
5228 }
5229
5230 CGF.EmitBranch(DefaultBB);
5231 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5232}
5233
5234/// Generates unique name for artificial threadprivate variables.
5235/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5236static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5237 const Expr *Ref) {
5238 SmallString<256> Buffer;
5239 llvm::raw_svector_ostream Out(Buffer);
5240 const clang::DeclRefExpr *DE;
5241 const VarDecl *D = ::getBaseDecl(Ref, DE);
5242 if (!D)
5243 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5244 D = D->getCanonicalDecl();
5245 std::string Name = CGM.getOpenMPRuntime().getName(
5246 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5247 Out << Prefix << Name << "_"
5249 return std::string(Out.str());
5250}
5251
5252/// Emits reduction initializer function:
5253/// \code
5254/// void @.red_init(void* %arg, void* %orig) {
5255/// %0 = bitcast void* %arg to <type>*
5256/// store <type> <init>, <type>* %0
5257/// ret void
5258/// }
5259/// \endcode
5260static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5262 ReductionCodeGen &RCG, unsigned N) {
5263 ASTContext &C = CGM.getContext();
5264 QualType VoidPtrTy = C.VoidPtrTy;
5265 VoidPtrTy.addRestrict();
5266 FunctionArgList Args;
5267 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5269 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5271 Args.emplace_back(&Param);
5272 Args.emplace_back(&ParamOrig);
5273 const auto &FnInfo =
5274 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5275 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5276 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5277 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5278 Name, &CGM.getModule());
5279 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5280 Fn->setDoesNotRecurse();
5281 CodeGenFunction CGF(CGM);
5282 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5283 QualType PrivateType = RCG.getPrivateType(N);
5284 Address PrivateAddr = CGF.EmitLoadOfPointer(
5286 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5287 C.getPointerType(PrivateType)->castAs<PointerType>());
5288 llvm::Value *Size = nullptr;
5289 // If the size of the reduction item is non-constant, load it from global
5290 // threadprivate variable.
5291 if (RCG.getSizes(N).second) {
5293 CGF, CGM.getContext().getSizeType(),
5294 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5295 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5296 CGM.getContext().getSizeType(), Loc);
5297 }
5298 RCG.emitAggregateType(CGF, N, Size);
5299 Address OrigAddr = Address::invalid();
5300 // If initializer uses initializer from declare reduction construct, emit a
5301 // pointer to the address of the original reduction item (reuired by reduction
5302 // initializer)
5303 if (RCG.usesReductionInitializer(N)) {
5304 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5305 OrigAddr = CGF.EmitLoadOfPointer(
5306 SharedAddr,
5307 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5308 }
5309 // Emit the initializer:
5310 // %0 = bitcast void* %arg to <type>*
5311 // store <type> <init>, <type>* %0
5312 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5313 [](CodeGenFunction &) { return false; });
5314 CGF.FinishFunction();
5315 return Fn;
5316}
5317
5318/// Emits reduction combiner function:
5319/// \code
5320/// void @.red_comb(void* %arg0, void* %arg1) {
5321/// %lhs = bitcast void* %arg0 to <type>*
5322/// %rhs = bitcast void* %arg1 to <type>*
5323/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5324/// store <type> %2, <type>* %lhs
5325/// ret void
5326/// }
5327/// \endcode
5328static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5330 ReductionCodeGen &RCG, unsigned N,
5331 const Expr *ReductionOp,
5332 const Expr *LHS, const Expr *RHS,
5333 const Expr *PrivateRef) {
5334 ASTContext &C = CGM.getContext();
5335 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5336 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5337 FunctionArgList Args;
5338 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5339 C.VoidPtrTy, ImplicitParamKind::Other);
5340 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5342 Args.emplace_back(&ParamInOut);
5343 Args.emplace_back(&ParamIn);
5344 const auto &FnInfo =
5345 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5346 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5347 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5348 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5349 Name, &CGM.getModule());
5350 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5351 Fn->setDoesNotRecurse();
5352 CodeGenFunction CGF(CGM);
5353 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5354 llvm::Value *Size = nullptr;
5355 // If the size of the reduction item is non-constant, load it from global
5356 // threadprivate variable.
5357 if (RCG.getSizes(N).second) {
5359 CGF, CGM.getContext().getSizeType(),
5360 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5361 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5362 CGM.getContext().getSizeType(), Loc);
5363 }
5364 RCG.emitAggregateType(CGF, N, Size);
5365 // Remap lhs and rhs variables to the addresses of the function arguments.
5366 // %lhs = bitcast void* %arg0 to <type>*
5367 // %rhs = bitcast void* %arg1 to <type>*
5368 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5369 PrivateScope.addPrivate(
5370 LHSVD,
5371 // Pull out the pointer to the variable.
5373 CGF.GetAddrOfLocalVar(&ParamInOut)
5375 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5376 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5377 PrivateScope.addPrivate(
5378 RHSVD,
5379 // Pull out the pointer to the variable.
5381 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5382 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5383 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5384 PrivateScope.Privatize();
5385 // Emit the combiner body:
5386 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5387 // store <type> %2, <type>* %lhs
5389 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5390 cast<DeclRefExpr>(RHS));
5391 CGF.FinishFunction();
5392 return Fn;
5393}
5394
5395/// Emits reduction finalizer function:
5396/// \code
5397/// void @.red_fini(void* %arg) {
5398/// %0 = bitcast void* %arg to <type>*
5399/// <destroy>(<type>* %0)
5400/// ret void
5401/// }
5402/// \endcode
5403static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5405 ReductionCodeGen &RCG, unsigned N) {
5406 if (!RCG.needCleanups(N))
5407 return nullptr;
5408 ASTContext &C = CGM.getContext();
5409 FunctionArgList Args;
5410 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5412 Args.emplace_back(&Param);
5413 const auto &FnInfo =
5414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5415 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5416 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5417 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5418 Name, &CGM.getModule());
5419 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5420 Fn->setDoesNotRecurse();
5421 CodeGenFunction CGF(CGM);
5422 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5423 Address PrivateAddr = CGF.EmitLoadOfPointer(
5424 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5425 llvm::Value *Size = nullptr;
5426 // If the size of the reduction item is non-constant, load it from global
5427 // threadprivate variable.
5428 if (RCG.getSizes(N).second) {
5430 CGF, CGM.getContext().getSizeType(),
5431 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5432 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5433 CGM.getContext().getSizeType(), Loc);
5434 }
5435 RCG.emitAggregateType(CGF, N, Size);
5436 // Emit the finalizer body:
5437 // <destroy>(<type>* %0)
5438 RCG.emitCleanups(CGF, N, PrivateAddr);
5439 CGF.FinishFunction(Loc);
5440 return Fn;
5441}
5442
5445 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5446 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5447 return nullptr;
5448
5449 // Build typedef struct:
5450 // kmp_taskred_input {
5451 // void *reduce_shar; // shared reduction item
5452 // void *reduce_orig; // original reduction item used for initialization
5453 // size_t reduce_size; // size of data item
5454 // void *reduce_init; // data initialization routine
5455 // void *reduce_fini; // data finalization routine
5456 // void *reduce_comb; // data combiner routine
5457 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5458 // } kmp_taskred_input_t;
5460 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5461 RD->startDefinition();
5462 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5463 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5464 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5465 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5466 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5467 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5468 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5469 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5470 RD->completeDefinition();
5471 QualType RDType = C.getRecordType(RD);
5472 unsigned Size = Data.ReductionVars.size();
5473 llvm::APInt ArraySize(/*numBits=*/64, Size);
5474 QualType ArrayRDType =
5475 C.getConstantArrayType(RDType, ArraySize, nullptr,
5476 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5477 // kmp_task_red_input_t .rd_input.[Size];
5478 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5479 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5480 Data.ReductionCopies, Data.ReductionOps);
5481 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5482 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5483 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5484 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5485 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5486 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5487 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5488 ".rd_input.gep.");
5489 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5490 // ElemLVal.reduce_shar = &Shareds[Cnt];
5491 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5492 RCG.emitSharedOrigLValue(CGF, Cnt);
5493 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5494 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5495 // ElemLVal.reduce_orig = &Origs[Cnt];
5496 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5497 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5498 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5499 RCG.emitAggregateType(CGF, Cnt);
5500 llvm::Value *SizeValInChars;
5501 llvm::Value *SizeVal;
5502 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5503 // We use delayed creation/initialization for VLAs and array sections. It is
5504 // required because runtime does not provide the way to pass the sizes of
5505 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5506 // threadprivate global variables are used to store these values and use
5507 // them in the functions.
5508 bool DelayedCreation = !!SizeVal;
5509 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5510 /*isSigned=*/false);
5511 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5512 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5513 // ElemLVal.reduce_init = init;
5514 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5515 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5516 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5517 // ElemLVal.reduce_fini = fini;
5518 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5519 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5520 llvm::Value *FiniAddr =
5521 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5522 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5523 // ElemLVal.reduce_comb = comb;
5524 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5525 llvm::Value *CombAddr = emitReduceCombFunction(
5526 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5527 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5528 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5529 // ElemLVal.flags = 0;
5530 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5531 if (DelayedCreation) {
5533 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5534 FlagsLVal);
5535 } else
5536 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5537 }
5538 if (Data.IsReductionWithTaskMod) {
5539 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5540 // is_ws, int num, void *data);
5541 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5542 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5543 CGM.IntTy, /*isSigned=*/true);
5544 llvm::Value *Args[] = {
5545 IdentTLoc, GTid,
5546 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5547 /*isSigned=*/true),
5548 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5550 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5551 return CGF.EmitRuntimeCall(
5552 OMPBuilder.getOrCreateRuntimeFunction(
5553 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5554 Args);
5555 }
5556 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5557 llvm::Value *Args[] = {
5558 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5559 /*isSigned=*/true),
5560 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5562 CGM.VoidPtrTy)};
5563 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5564 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5565 Args);
5566}
5567
5570 bool IsWorksharingReduction) {
5571 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5572 // is_ws, int num, void *data);
5573 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5574 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5575 CGM.IntTy, /*isSigned=*/true);
5576 llvm::Value *Args[] = {IdentTLoc, GTid,
5577 llvm::ConstantInt::get(CGM.IntTy,
5578 IsWorksharingReduction ? 1 : 0,
5579 /*isSigned=*/true)};
5580 (void)CGF.EmitRuntimeCall(
5581 OMPBuilder.getOrCreateRuntimeFunction(
5582 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5583 Args);
5584}
5585
5588 ReductionCodeGen &RCG,
5589 unsigned N) {
5590 auto Sizes = RCG.getSizes(N);
5591 // Emit threadprivate global variable if the type is non-constant
5592 // (Sizes.second = nullptr).
5593 if (Sizes.second) {
5594 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5595 /*isSigned=*/false);
5597 CGF, CGM.getContext().getSizeType(),
5598 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5599 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5600 }
5601}
5602
5605 llvm::Value *ReductionsPtr,
5606 LValue SharedLVal) {
5607 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5608 // *d);
5609 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5610 CGM.IntTy,
5611 /*isSigned=*/true),
5612 ReductionsPtr,
5614 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5615 return Address(
5616 CGF.EmitRuntimeCall(
5617 OMPBuilder.getOrCreateRuntimeFunction(
5618 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5619 Args),
5620 CGF.Int8Ty, SharedLVal.getAlignment());
5621}
5622
5624 const OMPTaskDataTy &Data) {
5625 if (!CGF.HaveInsertPoint())
5626 return;
5627
5628 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5629 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5630 OMPBuilder.createTaskwait(CGF.Builder);
5631 } else {
5632 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5633 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5634 auto &M = CGM.getModule();
5635 Address DependenciesArray = Address::invalid();
5636 llvm::Value *NumOfElements;
5637 std::tie(NumOfElements, DependenciesArray) =
5638 emitDependClause(CGF, Data.Dependences, Loc);
5639 if (!Data.Dependences.empty()) {
5640 llvm::Value *DepWaitTaskArgs[7];
5641 DepWaitTaskArgs[0] = UpLoc;
5642 DepWaitTaskArgs[1] = ThreadID;
5643 DepWaitTaskArgs[2] = NumOfElements;
5644 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5645 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5646 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5647 DepWaitTaskArgs[6] =
5648 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5649
5650 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5651
5652 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5653 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5654 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5655 // kmp_int32 has_no_wait); if dependence info is specified.
5656 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5657 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5658 DepWaitTaskArgs);
5659
5660 } else {
5661
5662 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5663 // global_tid);
5664 llvm::Value *Args[] = {UpLoc, ThreadID};
5665 // Ignore return result until untied tasks are supported.
5666 CGF.EmitRuntimeCall(
5667 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5668 Args);
5669 }
5670 }
5671
5672 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5673 Region->emitUntiedSwitch(CGF);
5674}
5675
5677 OpenMPDirectiveKind InnerKind,
5678 const RegionCodeGenTy &CodeGen,
5679 bool HasCancel) {
5680 if (!CGF.HaveInsertPoint())
5681 return;
5682 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5683 InnerKind != OMPD_critical &&
5684 InnerKind != OMPD_master &&
5685 InnerKind != OMPD_masked);
5686 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5687}
5688
5689namespace {
5690enum RTCancelKind {
5691 CancelNoreq = 0,
5692 CancelParallel = 1,
5693 CancelLoop = 2,
5694 CancelSections = 3,
5695 CancelTaskgroup = 4
5696};
5697} // anonymous namespace
5698
5699static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5700 RTCancelKind CancelKind = CancelNoreq;
5701 if (CancelRegion == OMPD_parallel)
5702 CancelKind = CancelParallel;
5703 else if (CancelRegion == OMPD_for)
5704 CancelKind = CancelLoop;
5705 else if (CancelRegion == OMPD_sections)
5706 CancelKind = CancelSections;
5707 else {
5708 assert(CancelRegion == OMPD_taskgroup);
5709 CancelKind = CancelTaskgroup;
5710 }
5711 return CancelKind;
5712}
5713
5716 OpenMPDirectiveKind CancelRegion) {
5717 if (!CGF.HaveInsertPoint())
5718 return;
5719 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5720 // global_tid, kmp_int32 cncl_kind);
5721 if (auto *OMPRegionInfo =
5722 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5723 // For 'cancellation point taskgroup', the task region info may not have a
5724 // cancel. This may instead happen in another adjacent task.
5725 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5726 llvm::Value *Args[] = {
5728 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5729 // Ignore return result until untied tasks are supported.
5730 llvm::Value *Result = CGF.EmitRuntimeCall(
5731 OMPBuilder.getOrCreateRuntimeFunction(
5732 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5733 Args);
5734 // if (__kmpc_cancellationpoint()) {
5735 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5736 // exit from construct;
5737 // }
5738 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5739 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5740 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5741 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5742 CGF.EmitBlock(ExitBB);
5743 if (CancelRegion == OMPD_parallel)
5744 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5745 // exit from construct;
5746 CodeGenFunction::JumpDest CancelDest =
5747 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5748 CGF.EmitBranchThroughCleanup(CancelDest);
5749 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5750 }
5751 }
5752}
5753
5755 const Expr *IfCond,
5756 OpenMPDirectiveKind CancelRegion) {
5757 if (!CGF.HaveInsertPoint())
5758 return;
5759 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5760 // kmp_int32 cncl_kind);
5761 auto &M = CGM.getModule();
5762 if (auto *OMPRegionInfo =
5763 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5764 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5765 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5767 llvm::Value *Args[] = {
5768 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5769 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5770 // Ignore return result until untied tasks are supported.
5771 llvm::Value *Result = CGF.EmitRuntimeCall(
5772 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5773 // if (__kmpc_cancel()) {
5774 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5775 // exit from construct;
5776 // }
5777 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5778 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5779 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5780 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5781 CGF.EmitBlock(ExitBB);
5782 if (CancelRegion == OMPD_parallel)
5783 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5784 // exit from construct;
5785 CodeGenFunction::JumpDest CancelDest =
5786 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5787 CGF.EmitBranchThroughCleanup(CancelDest);
5788 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5789 };
5790 if (IfCond) {
5791 emitIfClause(CGF, IfCond, ThenGen,
5792 [](CodeGenFunction &, PrePostActionTy &) {});
5793 } else {
5794 RegionCodeGenTy ThenRCG(ThenGen);
5795 ThenRCG(CGF);
5796 }
5797 }
5798}
5799
5800namespace {
5801/// Cleanup action for uses_allocators support.
5802class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5804
5805public:
5806 OMPUsesAllocatorsActionTy(
5807 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5808 : Allocators(Allocators) {}
5809 void Enter(CodeGenFunction &CGF) override {
5810 if (!CGF.HaveInsertPoint())
5811 return;
5812 for (const auto &AllocatorData : Allocators) {
5814 CGF, AllocatorData.first, AllocatorData.second);
5815 }
5816 }
5817 void Exit(CodeGenFunction &CGF) override {
5818 if (!CGF.HaveInsertPoint())
5819 return;
5820 for (const auto &AllocatorData : Allocators) {
5822 AllocatorData.first);
5823 }
5824 }
5825};
5826} // namespace
5827
5829 const OMPExecutableDirective &D, StringRef ParentName,
5830 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5831 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5832 assert(!ParentName.empty() && "Invalid target entry parent name!");
5835 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5836 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5837 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5838 if (!D.AllocatorTraits)
5839 continue;
5840 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
5841 }
5842 }
5843 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5844 CodeGen.setAction(UsesAllocatorAction);
5845 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5846 IsOffloadEntry, CodeGen);
5847}
5848
5850 const Expr *Allocator,
5851 const Expr *AllocatorTraits) {
5852 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5853 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5854 // Use default memspace handle.
5855 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5856 llvm::Value *NumTraits = llvm::ConstantInt::get(
5857 CGF.IntTy, cast<ConstantArrayType>(
5858 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5859 ->getSize()
5860 .getLimitedValue());
5861 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
5863 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
5864 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5865 AllocatorTraitsLVal.getBaseInfo(),
5866 AllocatorTraitsLVal.getTBAAInfo());
5867 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5868
5869 llvm::Value *AllocatorVal =
5870 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5871 CGM.getModule(), OMPRTL___kmpc_init_allocator),
5872 {ThreadId, MemSpaceHandle, NumTraits, Traits});
5873 // Store to allocator.
5874 CGF.EmitAutoVarAlloca(*cast<VarDecl>(
5875 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
5876 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5877 AllocatorVal =
5878 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
5879 Allocator->getType(), Allocator->getExprLoc());
5880 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
5881}
5882
5884 const Expr *Allocator) {
5885 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
5886 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
5887 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
5888 llvm::Value *AllocatorVal =
5889 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
5890 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
5891 CGF.getContext().VoidPtrTy,
5892 Allocator->getExprLoc());
5893 (void)CGF.EmitRuntimeCall(
5894 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
5895 OMPRTL___kmpc_destroy_allocator),
5896 {ThreadId, AllocatorVal});
5897}
5898
5901 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5902 int32_t &MaxTeamsVal) {
5903
5904 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5905 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
5906 /*UpperBoundOnly=*/true);
5907
5908 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5909 for (auto *A : C->getAttrs()) {
5910 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5911 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5912 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5913 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5914 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5915 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5917 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5918 &AttrMaxThreadsVal);
5919 else
5920 continue;
5921
5922 MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal);
5923 if (AttrMaxThreadsVal > 0)
5924 MaxThreadsVal = MaxThreadsVal > 0
5925 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
5926 : AttrMaxThreadsVal;
5927 MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal);
5928 if (AttrMaxBlocksVal > 0)
5929 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
5930 : AttrMaxBlocksVal;
5931 }
5932 }
5933}
5934
5936 const OMPExecutableDirective &D, StringRef ParentName,
5937 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5938 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5939
5940 llvm::TargetRegionEntryInfo EntryInfo =
5942
5943 CodeGenFunction CGF(CGM, true);
5944 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5945 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5946 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5947
5948 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5949 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5951 };
5952
5953 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
5954 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5955
5956 if (!OutlinedFn)
5957 return;
5958
5959 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
5960
5961 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5962 for (auto *A : C->getAttrs()) {
5963 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5964 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5965 }
5966 }
5967}
5968
5969/// Checks if the expression is constant or does not have non-trivial function
5970/// calls.
5971static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5972 // We can skip constant expressions.
5973 // We can skip expressions with trivial calls or simple expressions.
5975 !E->hasNonTrivialCall(Ctx)) &&
5976 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5977}
5978
5980 const Stmt *Body) {
5981 const Stmt *Child = Body->IgnoreContainers();
5982 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
5983 Child = nullptr;
5984 for (const Stmt *S : C->body()) {
5985 if (const auto *E = dyn_cast<Expr>(S)) {
5986 if (isTrivial(Ctx, E))
5987 continue;
5988 }
5989 // Some of the statements can be ignored.
5990 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
5991 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
5992 continue;
5993 // Analyze declarations.
5994 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
5995 if (llvm::all_of(DS->decls(), [](const Decl *D) {
5996 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
5997 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
5998 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
5999 isa<UsingDirectiveDecl>(D) ||
6000 isa<OMPDeclareReductionDecl>(D) ||
6001 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6002 return true;
6003 const auto *VD = dyn_cast<VarDecl>(D);
6004 if (!VD)
6005 return false;
6006 return VD->hasGlobalStorage() || !VD->isUsed();
6007 }))
6008 continue;
6009 }
6010 // Found multiple children - cannot get the one child only.
6011 if (Child)
6012 return nullptr;
6013 Child = S;
6014 }
6015 if (Child)
6016 Child = Child->IgnoreContainers();
6017 }
6018 return Child;
6019}
6020
6022 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6023 int32_t &MaxTeamsVal) {
6024
6025 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6026 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6027 "Expected target-based executable directive.");
6028 switch (DirectiveKind) {
6029 case OMPD_target: {
6030 const auto *CS = D.getInnermostCapturedStmt();
6031 const auto *Body =
6032 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6033 const Stmt *ChildStmt =
6035 if (const auto *NestedDir =
6036 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6037 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6038 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6039 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6040 ->getNumTeams()
6041 .front();
6042 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6043 if (auto Constant =
6044 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6045 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6046 return NumTeams;
6047 }
6048 MinTeamsVal = MaxTeamsVal = 0;
6049 return nullptr;
6050 }
6051 MinTeamsVal = MaxTeamsVal = 1;
6052 return nullptr;
6053 }
6054 // A value of -1 is used to check if we need to emit no teams region
6055 MinTeamsVal = MaxTeamsVal = -1;
6056 return nullptr;
6057 }
6058 case OMPD_target_teams_loop:
6059 case OMPD_target_teams:
6060 case OMPD_target_teams_distribute:
6061 case OMPD_target_teams_distribute_simd:
6062 case OMPD_target_teams_distribute_parallel_for:
6063 case OMPD_target_teams_distribute_parallel_for_simd: {
6064 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6065 const Expr *NumTeams =
6066 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6067 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6068 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6069 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6070 return NumTeams;
6071 }
6072 MinTeamsVal = MaxTeamsVal = 0;
6073 return nullptr;
6074 }
6075 case OMPD_target_parallel:
6076 case OMPD_target_parallel_for:
6077 case OMPD_target_parallel_for_simd:
6078 case OMPD_target_parallel_loop:
6079 case OMPD_target_simd:
6080 MinTeamsVal = MaxTeamsVal = 1;
6081 return nullptr;
6082 case OMPD_parallel:
6083 case OMPD_for:
6084 case OMPD_parallel_for:
6085 case OMPD_parallel_loop:
6086 case OMPD_parallel_master:
6087 case OMPD_parallel_sections:
6088 case OMPD_for_simd:
6089 case OMPD_parallel_for_simd:
6090 case OMPD_cancel:
6091 case OMPD_cancellation_point:
6092 case OMPD_ordered:
6093 case OMPD_threadprivate:
6094 case OMPD_allocate:
6095 case OMPD_task:
6096 case OMPD_simd:
6097 case OMPD_tile:
6098 case OMPD_unroll:
6099 case OMPD_sections:
6100 case OMPD_section:
6101 case OMPD_single:
6102 case OMPD_master:
6103 case OMPD_critical:
6104 case OMPD_taskyield:
6105 case OMPD_barrier:
6106 case OMPD_taskwait:
6107 case OMPD_taskgroup:
6108 case OMPD_atomic:
6109 case OMPD_flush:
6110 case OMPD_depobj:
6111 case OMPD_scan:
6112 case OMPD_teams:
6113 case OMPD_target_data:
6114 case OMPD_target_exit_data:
6115 case OMPD_target_enter_data:
6116 case OMPD_distribute:
6117 case OMPD_distribute_simd:
6118 case OMPD_distribute_parallel_for:
6119 case OMPD_distribute_parallel_for_simd:
6120 case OMPD_teams_distribute:
6121 case OMPD_teams_distribute_simd:
6122 case OMPD_teams_distribute_parallel_for:
6123 case OMPD_teams_distribute_parallel_for_simd:
6124 case OMPD_target_update:
6125 case OMPD_declare_simd:
6126 case OMPD_declare_variant:
6127 case OMPD_begin_declare_variant:
6128 case OMPD_end_declare_variant:
6129 case OMPD_declare_target:
6130 case OMPD_end_declare_target:
6131 case OMPD_declare_reduction:
6132 case OMPD_declare_mapper:
6133 case OMPD_taskloop:
6134 case OMPD_taskloop_simd:
6135 case OMPD_master_taskloop:
6136 case OMPD_master_taskloop_simd:
6137 case OMPD_parallel_master_taskloop:
6138 case OMPD_parallel_master_taskloop_simd:
6139 case OMPD_requires:
6140 case OMPD_metadirective:
6141 case OMPD_unknown:
6142 break;
6143 default:
6144 break;
6145 }
6146 llvm_unreachable("Unexpected directive kind.");
6147}
6148
6151 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6152 "Clauses associated with the teams directive expected to be emitted "
6153 "only for the host!");
6154 CGBuilderTy &Bld = CGF.Builder;
6155 int32_t MinNT = -1, MaxNT = -1;
6156 const Expr *NumTeams =
6157 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6158 if (NumTeams != nullptr) {
6159 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6160
6161 switch (DirectiveKind) {
6162 case OMPD_target: {
6163 const auto *CS = D.getInnermostCapturedStmt();
6164 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6165 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6166 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6167 /*IgnoreResultAssign*/ true);
6168 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6169 /*isSigned=*/true);
6170 }
6171 case OMPD_target_teams:
6172 case OMPD_target_teams_distribute:
6173 case OMPD_target_teams_distribute_simd:
6174 case OMPD_target_teams_distribute_parallel_for:
6175 case OMPD_target_teams_distribute_parallel_for_simd: {
6176 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6177 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6178 /*IgnoreResultAssign*/ true);
6179 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6180 /*isSigned=*/true);
6181 }
6182 default:
6183 break;
6184 }
6185 }
6186
6187 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6188 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6189}
6190
6191/// Check for a num threads constant value (stored in \p DefaultVal), or
6192/// expression (stored in \p E). If the value is conditional (via an if-clause),
6193/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6194/// nullptr, no expression evaluation is perfomed.
6195static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6196 const Expr **E, int32_t &UpperBound,
6197 bool UpperBoundOnly, llvm::Value **CondVal) {
6199 CGF.getContext(), CS->getCapturedStmt());
6200 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6201 if (!Dir)
6202 return;
6203
6204 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6205 // Handle if clause. If if clause present, the number of threads is
6206 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6207 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6208 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6209 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6210 const OMPIfClause *IfClause = nullptr;
6211 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6212 if (C->getNameModifier() == OMPD_unknown ||
6213 C->getNameModifier() == OMPD_parallel) {
6214 IfClause = C;
6215 break;
6216 }
6217 }
6218 if (IfClause) {
6219 const Expr *CondExpr = IfClause->getCondition();
6220 bool Result;
6221 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6222 if (!Result) {
6223 UpperBound = 1;
6224 return;
6225 }
6226 } else {
6227 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6228 if (const auto *PreInit =
6229 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6230 for (const auto *I : PreInit->decls()) {
6231 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6232 CGF.EmitVarDecl(cast<VarDecl>(*I));
6233 } else {
6234 CodeGenFunction::AutoVarEmission Emission =
6235 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6236 CGF.EmitAutoVarCleanups(Emission);
6237 }
6238 }
6239 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6240 }
6241 }
6242 }
6243 }
6244 // Check the value of num_threads clause iff if clause was not specified
6245 // or is not evaluated to false.
6246 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6247 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6248 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6249 const auto *NumThreadsClause =
6250 Dir->getSingleClause<OMPNumThreadsClause>();
6251 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6252 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6253 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6254 UpperBound =
6255 UpperBound
6256 ? Constant->getZExtValue()
6257 : std::min(UpperBound,
6258 static_cast<int32_t>(Constant->getZExtValue()));
6259 // If we haven't found a upper bound, remember we saw a thread limiting
6260 // clause.
6261 if (UpperBound == -1)
6262 UpperBound = 0;
6263 if (!E)
6264 return;
6265 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6266 if (const auto *PreInit =
6267 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6268 for (const auto *I : PreInit->decls()) {
6269 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6270 CGF.EmitVarDecl(cast<VarDecl>(*I));
6271 } else {
6272 CodeGenFunction::AutoVarEmission Emission =
6273 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6274 CGF.EmitAutoVarCleanups(Emission);
6275 }
6276 }
6277 }
6278 *E = NTExpr;
6279 }
6280 return;
6281 }
6282 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6283 UpperBound = 1;
6284}
6285
6287 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6288 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6289 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6290 "Clauses associated with the teams directive expected to be emitted "
6291 "only for the host!");
6292 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6293 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6294 "Expected target-based executable directive.");
6295
6296 const Expr *NT = nullptr;
6297 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6298
6299 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6300 if (E->isIntegerConstantExpr(CGF.getContext())) {
6301 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6302 UpperBound = UpperBound ? Constant->getZExtValue()
6303 : std::min(UpperBound,
6304 int32_t(Constant->getZExtValue()));
6305 }
6306 // If we haven't found a upper bound, remember we saw a thread limiting
6307 // clause.
6308 if (UpperBound == -1)
6309 UpperBound = 0;
6310 if (EPtr)
6311 *EPtr = E;
6312 };
6313
6314 auto ReturnSequential = [&]() {
6315 UpperBound = 1;
6316 return NT;
6317 };
6318
6319 switch (DirectiveKind) {
6320 case OMPD_target: {
6321 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6322 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6324 CGF.getContext(), CS->getCapturedStmt());
6325 // TODO: The standard is not clear how to resolve two thread limit clauses,
6326 // let's pick the teams one if it's present, otherwise the target one.
6327 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6328 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6329 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6330 ThreadLimitClause = TLC;
6331 if (ThreadLimitExpr) {
6332 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6333 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6335 CGF,
6336 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6337 if (const auto *PreInit =
6338 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6339 for (const auto *I : PreInit->decls()) {
6340 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6341 CGF.EmitVarDecl(cast<VarDecl>(*I));
6342 } else {
6344 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6345 CGF.EmitAutoVarCleanups(Emission);
6346 }
6347 }
6348 }
6349 }
6350 }
6351 }
6352 if (ThreadLimitClause)
6353 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6354 ThreadLimitExpr);
6355 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6356 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6357 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6358 CS = Dir->getInnermostCapturedStmt();
6360 CGF.getContext(), CS->getCapturedStmt());
6361 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6362 }
6363 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6364 CS = Dir->getInnermostCapturedStmt();
6365 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6366 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6367 return ReturnSequential();
6368 }
6369 return NT;
6370 }
6371 case OMPD_target_teams: {
6372 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6373 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6374 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6375 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6376 ThreadLimitExpr);
6377 }
6378 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6379 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6381 CGF.getContext(), CS->getCapturedStmt());
6382 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6383 if (Dir->getDirectiveKind() == OMPD_distribute) {
6384 CS = Dir->getInnermostCapturedStmt();
6385 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6386 }
6387 }
6388 return NT;
6389 }
6390 case OMPD_target_teams_distribute:
6391 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6392 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6393 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6394 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6395 ThreadLimitExpr);
6396 }
6397 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6398 UpperBoundOnly, CondVal);
6399 return NT;
6400 case OMPD_target_teams_loop:
6401 case OMPD_target_parallel_loop:
6402 case OMPD_target_parallel:
6403 case OMPD_target_parallel_for:
6404 case OMPD_target_parallel_for_simd:
6405 case OMPD_target_teams_distribute_parallel_for:
6406 case OMPD_target_teams_distribute_parallel_for_simd: {
6407 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6408 const OMPIfClause *IfClause = nullptr;
6409 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6410 if (C->getNameModifier() == OMPD_unknown ||
6411 C->getNameModifier() == OMPD_parallel) {
6412 IfClause = C;
6413 break;
6414 }
6415 }
6416 if (IfClause) {
6417 const Expr *Cond = IfClause->getCondition();
6418 bool Result;
6419 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6420 if (!Result)
6421 return ReturnSequential();
6422 } else {
6424 *CondVal = CGF.EvaluateExprAsBool(Cond);
6425 }
6426 }
6427 }
6428 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6429 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6430 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6431 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6432 ThreadLimitExpr);
6433 }
6434 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6435 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6436 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6437 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6438 return NumThreadsClause->getNumThreads();
6439 }
6440 return NT;
6441 }
6442 case OMPD_target_teams_distribute_simd:
6443 case OMPD_target_simd:
6444 return ReturnSequential();
6445 default:
6446 break;
6447 }
6448 llvm_unreachable("Unsupported directive kind.");
6449}
6450
6453 llvm::Value *NumThreadsVal = nullptr;
6454 llvm::Value *CondVal = nullptr;
6455 llvm::Value *ThreadLimitVal = nullptr;
6456 const Expr *ThreadLimitExpr = nullptr;
6457 int32_t UpperBound = -1;
6458
6460 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6461 &ThreadLimitExpr);
6462
6463 // Thread limit expressions are used below, emit them.
6464 if (ThreadLimitExpr) {
6465 ThreadLimitVal =
6466 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6467 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6468 /*isSigned=*/false);
6469 }
6470
6471 // Generate the num teams expression.
6472 if (UpperBound == 1) {
6473 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6474 } else if (NT) {
6475 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6476 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6477 /*isSigned=*/false);
6478 } else if (ThreadLimitVal) {
6479 // If we do not have a num threads value but a thread limit, replace the
6480 // former with the latter. We know handled the thread limit expression.
6481 NumThreadsVal = ThreadLimitVal;
6482 ThreadLimitVal = nullptr;
6483 } else {
6484 // Default to "0" which means runtime choice.
6485 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6486 NumThreadsVal = CGF.Builder.getInt32(0);
6487 }
6488
6489 // Handle if clause. If if clause present, the number of threads is
6490 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6491 if (CondVal) {
6493 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6494 CGF.Builder.getInt32(1));
6495 }
6496
6497 // If the thread limit and num teams expression were present, take the
6498 // minimum.
6499 if (ThreadLimitVal) {
6500 NumThreadsVal = CGF.Builder.CreateSelect(
6501 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6502 ThreadLimitVal, NumThreadsVal);
6503 }
6504
6505 return NumThreadsVal;
6506}
6507
6508namespace {
6510
6511// Utility to handle information from clauses associated with a given
6512// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6513// It provides a convenient interface to obtain the information and generate
6514// code for that information.
6515class MappableExprsHandler {
6516public:
6517 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6518 static unsigned getFlagMemberOffset() {
6519 unsigned Offset = 0;
6520 for (uint64_t Remain =
6521 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6522 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6523 !(Remain & 1); Remain = Remain >> 1)
6524 Offset++;
6525 return Offset;
6526 }
6527
6528 /// Class that holds debugging information for a data mapping to be passed to
6529 /// the runtime library.
6530 class MappingExprInfo {
6531 /// The variable declaration used for the data mapping.
6532 const ValueDecl *MapDecl = nullptr;
6533 /// The original expression used in the map clause, or null if there is
6534 /// none.
6535 const Expr *MapExpr = nullptr;
6536
6537 public:
6538 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6539 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6540
6541 const ValueDecl *getMapDecl() const { return MapDecl; }
6542 const Expr *getMapExpr() const { return MapExpr; }
6543 };
6544
6545 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6546 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6547 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6548 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6549 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6550 using MapNonContiguousArrayTy =
6551 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6552 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6553 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6554
6555 /// This structure contains combined information generated for mappable
6556 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6557 /// mappers, and non-contiguous information.
6558 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6559 MapExprsArrayTy Exprs;
6560 MapValueDeclsArrayTy Mappers;
6561 MapValueDeclsArrayTy DevicePtrDecls;
6562
6563 /// Append arrays in \a CurInfo.
6564 void append(MapCombinedInfoTy &CurInfo) {
6565 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6566 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6567 CurInfo.DevicePtrDecls.end());
6568 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6569 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6570 }
6571 };
6572
6573 /// Map between a struct and the its lowest & highest elements which have been
6574 /// mapped.
6575 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6576 /// HE(FieldIndex, Pointer)}
6577 struct StructRangeInfoTy {
6578 MapCombinedInfoTy PreliminaryMapData;
6579 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6580 0, Address::invalid()};
6581 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6582 0, Address::invalid()};
6585 bool IsArraySection = false;
6586 bool HasCompleteRecord = false;
6587 };
6588
6589private:
6590 /// Kind that defines how a device pointer has to be returned.
6591 struct MapInfo {
6595 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6596 bool ReturnDevicePointer = false;
6597 bool IsImplicit = false;
6598 const ValueDecl *Mapper = nullptr;
6599 const Expr *VarRef = nullptr;
6600 bool ForDeviceAddr = false;
6601
6602 MapInfo() = default;
6603 MapInfo(
6605 OpenMPMapClauseKind MapType,
6607 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6608 bool ReturnDevicePointer, bool IsImplicit,
6609 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6610 bool ForDeviceAddr = false)
6611 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6612 MotionModifiers(MotionModifiers),
6613 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6614 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6615 };
6616
6617 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6618 /// member and there is no map information about it, then emission of that
6619 /// entry is deferred until the whole struct has been processed.
6620 struct DeferredDevicePtrEntryTy {
6621 const Expr *IE = nullptr;
6622 const ValueDecl *VD = nullptr;
6623 bool ForDeviceAddr = false;
6624
6625 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6626 bool ForDeviceAddr)
6627 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6628 };
6629
6630 /// The target directive from where the mappable clauses were extracted. It
6631 /// is either a executable directive or a user-defined mapper directive.
6632 llvm::PointerUnion<const OMPExecutableDirective *,
6633 const OMPDeclareMapperDecl *>
6634 CurDir;
6635
6636 /// Function the directive is being generated for.
6637 CodeGenFunction &CGF;
6638
6639 /// Set of all first private variables in the current directive.
6640 /// bool data is set to true if the variable is implicitly marked as
6641 /// firstprivate, false otherwise.
6642 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6643
6644 /// Map between device pointer declarations and their expression components.
6645 /// The key value for declarations in 'this' is null.
6646 llvm::DenseMap<
6647 const ValueDecl *,
6649 DevPointersMap;
6650
6651 /// Map between device addr declarations and their expression components.
6652 /// The key value for declarations in 'this' is null.
6653 llvm::DenseMap<
6654 const ValueDecl *,
6656 HasDevAddrsMap;
6657
6658 /// Map between lambda declarations and their map type.
6659 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6660
6661 llvm::Value *getExprTypeSize(const Expr *E) const {
6662 QualType ExprTy = E->getType().getCanonicalType();
6663
6664 // Calculate the size for array shaping expression.
6665 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6666 llvm::Value *Size =
6667 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6668 for (const Expr *SE : OAE->getDimensions()) {
6669 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6670 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6671 CGF.getContext().getSizeType(),
6672 SE->getExprLoc());
6673 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6674 }
6675 return Size;
6676 }
6677
6678 // Reference types are ignored for mapping purposes.
6679 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6680 ExprTy = RefTy->getPointeeType().getCanonicalType();
6681
6682 // Given that an array section is considered a built-in type, we need to
6683 // do the calculation based on the length of the section instead of relying
6684 // on CGF.getTypeSize(E->getType()).
6685 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6687 OAE->getBase()->IgnoreParenImpCasts())
6689
6690 // If there is no length associated with the expression and lower bound is
6691 // not specified too, that means we are using the whole length of the
6692 // base.
6693 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6694 !OAE->getLowerBound())
6695 return CGF.getTypeSize(BaseTy);
6696
6697 llvm::Value *ElemSize;
6698 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6699 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6700 } else {
6701 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6702 assert(ATy && "Expecting array type if not a pointer type.");
6703 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6704 }
6705
6706 // If we don't have a length at this point, that is because we have an
6707 // array section with a single element.
6708 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6709 return ElemSize;
6710
6711 if (const Expr *LenExpr = OAE->getLength()) {
6712 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
6713 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
6714 CGF.getContext().getSizeType(),
6715 LenExpr->getExprLoc());
6716 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6717 }
6718 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6719 OAE->getLowerBound() && "expected array_section[lb:].");
6720 // Size = sizetype - lb * elemtype;
6721 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
6722 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
6723 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
6724 CGF.getContext().getSizeType(),
6725 OAE->getLowerBound()->getExprLoc());
6726 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
6727 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
6728 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
6729 LengthVal = CGF.Builder.CreateSelect(
6730 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
6731 return LengthVal;
6732 }
6733 return CGF.getTypeSize(ExprTy);
6734 }
6735
6736 /// Return the corresponding bits for a given map clause modifier. Add
6737 /// a flag marking the map as a pointer if requested. Add a flag marking the
6738 /// map as the first one of a series of maps that relate to the same map
6739 /// expression.
6740 OpenMPOffloadMappingFlags getMapTypeBits(
6742 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6743 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6744 OpenMPOffloadMappingFlags Bits =
6745 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6746 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6747 switch (MapType) {
6748 case OMPC_MAP_alloc:
6749 case OMPC_MAP_release:
6750 // alloc and release is the default behavior in the runtime library, i.e.
6751 // if we don't pass any bits alloc/release that is what the runtime is
6752 // going to do. Therefore, we don't need to signal anything for these two
6753 // type modifiers.
6754 break;
6755 case OMPC_MAP_to:
6756 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6757 break;
6758 case OMPC_MAP_from:
6759 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6760 break;
6761 case OMPC_MAP_tofrom:
6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6763 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6764 break;
6765 case OMPC_MAP_delete:
6766 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6767 break;
6768 case OMPC_MAP_unknown:
6769 llvm_unreachable("Unexpected map type!");
6770 }
6771 if (AddPtrFlag)
6772 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6773 if (AddIsTargetParamFlag)
6774 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6775 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
6776 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6777 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
6778 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6779 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
6780 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
6781 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6782 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
6783 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6784 if (IsNonContiguous)
6785 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6786 return Bits;
6787 }
6788
6789 /// Return true if the provided expression is a final array section. A
6790 /// final array section, is one whose length can't be proved to be one.
6791 bool isFinalArraySectionExpression(const Expr *E) const {
6792 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
6793
6794 // It is not an array section and therefore not a unity-size one.
6795 if (!OASE)
6796 return false;
6797
6798 // An array section with no colon always refer to a single element.
6799 if (OASE->getColonLocFirst().isInvalid())
6800 return false;
6801
6802 const Expr *Length = OASE->getLength();
6803
6804 // If we don't have a length we have to check if the array has size 1
6805 // for this dimension. Also, we should always expect a length if the
6806 // base type is pointer.
6807 if (!Length) {
6809 OASE->getBase()->IgnoreParenImpCasts())
6811 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6812 return ATy->getSExtSize() != 1;
6813 // If we don't have a constant dimension length, we have to consider
6814 // the current section as having any size, so it is not necessarily
6815 // unitary. If it happen to be unity size, that's user fault.
6816 return true;
6817 }
6818
6819 // Check if the length evaluates to 1.
6820 Expr::EvalResult Result;
6821 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
6822 return true; // Can have more that size 1.
6823
6824 llvm::APSInt ConstLength = Result.Val.getInt();
6825 return ConstLength.getSExtValue() != 1;
6826 }
6827
6828 /// Generate the base pointers, section pointers, sizes, map type bits, and
6829 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6830 /// map type, map or motion modifiers, and expression components.
6831 /// \a IsFirstComponent should be set to true if the provided set of
6832 /// components is the first associated with a capture.
6833 void generateInfoForComponentList(
6835 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6837 MapCombinedInfoTy &CombinedInfo,
6838 MapCombinedInfoTy &StructBaseCombinedInfo,
6839 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6840 bool IsImplicit, bool GenerateAllInfoForClauses,
6841 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6842 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6844 OverlappedElements = std::nullopt,
6845 bool AreBothBasePtrAndPteeMapped = false) const {
6846 // The following summarizes what has to be generated for each map and the
6847 // types below. The generated information is expressed in this order:
6848 // base pointer, section pointer, size, flags
6849 // (to add to the ones that come from the map type and modifier).
6850 //
6851 // double d;
6852 // int i[100];
6853 // float *p;
6854 // int **a = &i;
6855 //
6856 // struct S1 {
6857 // int i;
6858 // float f[50];
6859 // }
6860 // struct S2 {
6861 // int i;
6862 // float f[50];
6863 // S1 s;
6864 // double *p;
6865 // struct S2 *ps;
6866 // int &ref;
6867 // }
6868 // S2 s;
6869 // S2 *ps;
6870 //
6871 // map(d)
6872 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6873 //
6874 // map(i)
6875 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6876 //
6877 // map(i[1:23])
6878 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6879 //
6880 // map(p)
6881 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6882 //
6883 // map(p[1:24])
6884 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6885 // in unified shared memory mode or for local pointers
6886 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6887 //
6888 // map((*a)[0:3])
6889 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6890 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6891 //
6892 // map(**a)
6893 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6894 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6895 //
6896 // map(s)
6897 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6898 //
6899 // map(s.i)
6900 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6901 //
6902 // map(s.s.f)
6903 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6904 //
6905 // map(s.p)
6906 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6907 //
6908 // map(to: s.p[:22])
6909 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6910 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6911 // &(s.p), &(s.p[0]), 22*sizeof(double),
6912 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6913 // (*) alloc space for struct members, only this is a target parameter
6914 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6915 // optimizes this entry out, same in the examples below)
6916 // (***) map the pointee (map: to)
6917 //
6918 // map(to: s.ref)
6919 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6920 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6921 // (*) alloc space for struct members, only this is a target parameter
6922 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6923 // optimizes this entry out, same in the examples below)
6924 // (***) map the pointee (map: to)
6925 //
6926 // map(s.ps)
6927 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6928 //
6929 // map(from: s.ps->s.i)
6930 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6931 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6932 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6933 //
6934 // map(to: s.ps->ps)
6935 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6936 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6937 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6938 //
6939 // map(s.ps->ps->ps)
6940 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6941 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6942 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6943 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6944 //
6945 // map(to: s.ps->ps->s.f[:22])
6946 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6947 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6948 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6949 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6950 //
6951 // map(ps)
6952 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6953 //
6954 // map(ps->i)
6955 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6956 //
6957 // map(ps->s.f)
6958 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6959 //
6960 // map(from: ps->p)
6961 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6962 //
6963 // map(to: ps->p[:22])
6964 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6965 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6966 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6967 //
6968 // map(ps->ps)
6969 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6970 //
6971 // map(from: ps->ps->s.i)
6972 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6973 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6974 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6975 //
6976 // map(from: ps->ps->ps)
6977 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6978 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6979 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6980 //
6981 // map(ps->ps->ps->ps)
6982 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6983 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6984 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6985 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6986 //
6987 // map(to: ps->ps->ps->s.f[:22])
6988 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6989 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6990 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6991 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6992 //
6993 // map(to: s.f[:22]) map(from: s.p[:33])
6994 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6995 // sizeof(double*) (**), TARGET_PARAM
6996 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6997 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6998 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6999 // (*) allocate contiguous space needed to fit all mapped members even if
7000 // we allocate space for members not mapped (in this example,
7001 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7002 // them as well because they fall between &s.f[0] and &s.p)
7003 //
7004 // map(from: s.f[:22]) map(to: ps->p[:33])
7005 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7006 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7007 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7008 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7009 // (*) the struct this entry pertains to is the 2nd element in the list of
7010 // arguments, hence MEMBER_OF(2)
7011 //
7012 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7013 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7014 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7015 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7016 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7017 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7018 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7019 // (*) the struct this entry pertains to is the 4th element in the list
7020 // of arguments, hence MEMBER_OF(4)
7021 //
7022 // map(p, p[:100])
7023 // ===> map(p[:100])
7024 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7025
7026 // Track if the map information being generated is the first for a capture.
7027 bool IsCaptureFirstInfo = IsFirstComponentList;
7028 // When the variable is on a declare target link or in a to clause with
7029 // unified memory, a reference is needed to hold the host/device address
7030 // of the variable.
7031 bool RequiresReference = false;
7032
7033 // Scan the components from the base to the complete expression.
7034 auto CI = Components.rbegin();
7035 auto CE = Components.rend();
7036 auto I = CI;
7037
7038 // Track if the map information being generated is the first for a list of
7039 // components.
7040 bool IsExpressionFirstInfo = true;
7041 bool FirstPointerInComplexData = false;
7043 const Expr *AssocExpr = I->getAssociatedExpression();
7044 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7045 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7046 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7047
7048 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7049 return;
7050 if (isa<MemberExpr>(AssocExpr)) {
7051 // The base is the 'this' pointer. The content of the pointer is going
7052 // to be the base of the field being mapped.
7053 BP = CGF.LoadCXXThisAddress();
7054 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7055 (OASE &&
7056 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7057 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7058 } else if (OAShE &&
7059 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7060 BP = Address(
7061 CGF.EmitScalarExpr(OAShE->getBase()),
7062 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7063 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7064 } else {
7065 // The base is the reference to the variable.
7066 // BP = &Var.
7067 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7068 if (const auto *VD =
7069 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7070 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7071 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7072 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7073 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7074 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7076 RequiresReference = true;
7078 }
7079 }
7080 }
7081
7082 // If the variable is a pointer and is being dereferenced (i.e. is not
7083 // the last component), the base has to be the pointer itself, not its
7084 // reference. References are ignored for mapping purposes.
7085 QualType Ty =
7086 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7087 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7088 // No need to generate individual map information for the pointer, it
7089 // can be associated with the combined storage if shared memory mode is
7090 // active or the base declaration is not global variable.
7091 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7092 if (!AreBothBasePtrAndPteeMapped &&
7094 !VD || VD->hasLocalStorage()))
7095 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7096 else
7097 FirstPointerInComplexData = true;
7098 ++I;
7099 }
7100 }
7101
7102 // Track whether a component of the list should be marked as MEMBER_OF some
7103 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7104 // in a component list should be marked as MEMBER_OF, all subsequent entries
7105 // do not belong to the base struct. E.g.
7106 // struct S2 s;
7107 // s.ps->ps->ps->f[:]
7108 // (1) (2) (3) (4)
7109 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7110 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7111 // is the pointee of ps(2) which is not member of struct s, so it should not
7112 // be marked as such (it is still PTR_AND_OBJ).
7113 // The variable is initialized to false so that PTR_AND_OBJ entries which
7114 // are not struct members are not considered (e.g. array of pointers to
7115 // data).
7116 bool ShouldBeMemberOf = false;
7117
7118 // Variable keeping track of whether or not we have encountered a component
7119 // in the component list which is a member expression. Useful when we have a
7120 // pointer or a final array section, in which case it is the previous
7121 // component in the list which tells us whether we have a member expression.
7122 // E.g. X.f[:]
7123 // While processing the final array section "[:]" it is "f" which tells us
7124 // whether we are dealing with a member of a declared struct.
7125 const MemberExpr *EncounteredME = nullptr;
7126
7127 // Track for the total number of dimension. Start from one for the dummy
7128 // dimension.
7129 uint64_t DimSize = 1;
7130
7131 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7132 bool IsPrevMemberReference = false;
7133
7134 bool IsPartialMapped =
7135 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7136
7137 // We need to check if we will be encountering any MEs. If we do not
7138 // encounter any ME expression it means we will be mapping the whole struct.
7139 // In that case we need to skip adding an entry for the struct to the
7140 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7141 // list only when generating all info for clauses.
7142 bool IsMappingWholeStruct = true;
7143 if (!GenerateAllInfoForClauses) {
7144 IsMappingWholeStruct = false;
7145 } else {
7146 for (auto TempI = I; TempI != CE; ++TempI) {
7147 const MemberExpr *PossibleME =
7148 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7149 if (PossibleME) {
7150 IsMappingWholeStruct = false;
7151 break;
7152 }
7153 }
7154 }
7155
7156 for (; I != CE; ++I) {
7157 // If the current component is member of a struct (parent struct) mark it.
7158 if (!EncounteredME) {
7159 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7160 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7161 // as MEMBER_OF the parent struct.
7162 if (EncounteredME) {
7163 ShouldBeMemberOf = true;
7164 // Do not emit as complex pointer if this is actually not array-like
7165 // expression.
7166 if (FirstPointerInComplexData) {
7167 QualType Ty = std::prev(I)
7168 ->getAssociatedDeclaration()
7169 ->getType()
7170 .getNonReferenceType();
7171 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7172 FirstPointerInComplexData = false;
7173 }
7174 }
7175 }
7176
7177 auto Next = std::next(I);
7178
7179 // We need to generate the addresses and sizes if this is the last
7180 // component, if the component is a pointer or if it is an array section
7181 // whose length can't be proved to be one. If this is a pointer, it
7182 // becomes the base address for the following components.
7183
7184 // A final array section, is one whose length can't be proved to be one.
7185 // If the map item is non-contiguous then we don't treat any array section
7186 // as final array section.
7187 bool IsFinalArraySection =
7188 !IsNonContiguous &&
7189 isFinalArraySectionExpression(I->getAssociatedExpression());
7190
7191 // If we have a declaration for the mapping use that, otherwise use
7192 // the base declaration of the map clause.
7193 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7194 ? I->getAssociatedDeclaration()
7195 : BaseDecl;
7196 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7197 : MapExpr;
7198
7199 // Get information on whether the element is a pointer. Have to do a
7200 // special treatment for array sections given that they are built-in
7201 // types.
7202 const auto *OASE =
7203 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7204 const auto *OAShE =
7205 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7206 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7207 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7208 bool IsPointer =
7209 OAShE ||
7212 ->isAnyPointerType()) ||
7213 I->getAssociatedExpression()->getType()->isAnyPointerType();
7214 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7215 MapDecl &&
7216 MapDecl->getType()->isLValueReferenceType();
7217 bool IsNonDerefPointer = IsPointer &&
7218 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7219 !IsNonContiguous;
7220
7221 if (OASE)
7222 ++DimSize;
7223
7224 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7225 IsFinalArraySection) {
7226 // If this is not the last component, we expect the pointer to be
7227 // associated with an array expression or member expression.
7228 assert((Next == CE ||
7229 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7230 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7231 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7232 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7233 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7234 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7235 "Unexpected expression");
7236
7238 Address LowestElem = Address::invalid();
7239 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7240 const MemberExpr *E) {
7241 const Expr *BaseExpr = E->getBase();
7242 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7243 // scalar.
7244 LValue BaseLV;
7245 if (E->isArrow()) {
7246 LValueBaseInfo BaseInfo;
7247 TBAAAccessInfo TBAAInfo;
7248 Address Addr =
7249 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7250 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7251 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7252 } else {
7253 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7254 }
7255 return BaseLV;
7256 };
7257 if (OAShE) {
7258 LowestElem = LB =
7259 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7261 OAShE->getBase()->getType()->getPointeeType()),
7263 OAShE->getBase()->getType()));
7264 } else if (IsMemberReference) {
7265 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7266 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7267 LowestElem = CGF.EmitLValueForFieldInitialization(
7268 BaseLVal, cast<FieldDecl>(MapDecl))
7269 .getAddress();
7270 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7271 .getAddress();
7272 } else {
7273 LowestElem = LB =
7274 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7275 .getAddress();
7276 }
7277
7278 // If this component is a pointer inside the base struct then we don't
7279 // need to create any entry for it - it will be combined with the object
7280 // it is pointing to into a single PTR_AND_OBJ entry.
7281 bool IsMemberPointerOrAddr =
7282 EncounteredME &&
7283 (((IsPointer || ForDeviceAddr) &&
7284 I->getAssociatedExpression() == EncounteredME) ||
7285 (IsPrevMemberReference && !IsPointer) ||
7286 (IsMemberReference && Next != CE &&
7287 !Next->getAssociatedExpression()->getType()->isPointerType()));
7288 if (!OverlappedElements.empty() && Next == CE) {
7289 // Handle base element with the info for overlapped elements.
7290 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7291 assert(!IsPointer &&
7292 "Unexpected base element with the pointer type.");
7293 // Mark the whole struct as the struct that requires allocation on the
7294 // device.
7295 PartialStruct.LowestElem = {0, LowestElem};
7296 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7297 I->getAssociatedExpression()->getType());
7300 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7301 TypeSize.getQuantity() - 1);
7302 PartialStruct.HighestElem = {
7303 std::numeric_limits<decltype(
7304 PartialStruct.HighestElem.first)>::max(),
7305 HB};
7306 PartialStruct.Base = BP;
7307 PartialStruct.LB = LB;
7308 assert(
7309 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7310 "Overlapped elements must be used only once for the variable.");
7311 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7312 // Emit data for non-overlapped data.
7313 OpenMPOffloadMappingFlags Flags =
7314 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7315 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7316 /*AddPtrFlag=*/false,
7317 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7318 llvm::Value *Size = nullptr;
7319 // Do bitcopy of all non-overlapped structure elements.
7321 Component : OverlappedElements) {
7322 Address ComponentLB = Address::invalid();
7324 Component) {
7325 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7326 const auto *FD = dyn_cast<FieldDecl>(VD);
7327 if (FD && FD->getType()->isLValueReferenceType()) {
7328 const auto *ME =
7329 cast<MemberExpr>(MC.getAssociatedExpression());
7330 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7331 ComponentLB =
7332 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7333 .getAddress();
7334 } else {
7335 ComponentLB =
7336 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7337 .getAddress();
7338 }
7339 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7340 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7341 Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7342 LBPtr);
7343 break;
7344 }
7345 }
7346 assert(Size && "Failed to determine structure size");
7347 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7348 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7349 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7350 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7351 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7352 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7353 Size, CGF.Int64Ty, /*isSigned=*/true));
7354 CombinedInfo.Types.push_back(Flags);
7355 CombinedInfo.Mappers.push_back(nullptr);
7356 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7357 : 1);
7358 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7359 }
7360 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7361 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7362 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7363 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7364 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7365 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7366 Size = CGF.Builder.CreatePtrDiff(
7367 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7368 LBPtr);
7369 CombinedInfo.Sizes.push_back(
7370 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7371 CombinedInfo.Types.push_back(Flags);
7372 CombinedInfo.Mappers.push_back(nullptr);
7373 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7374 : 1);
7375 break;
7376 }
7377 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7378 // Skip adding an entry in the CurInfo of this combined entry if the
7379 // whole struct is currently being mapped. The struct needs to be added
7380 // in the first position before any data internal to the struct is being
7381 // mapped.
7382 // Skip adding an entry in the CurInfo of this combined entry if the
7383 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7384 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7385 (Next == CE && MapType != OMPC_MAP_unknown)) {
7386 if (!IsMappingWholeStruct) {
7387 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7388 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7389 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7390 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7391 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7392 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7393 Size, CGF.Int64Ty, /*isSigned=*/true));
7394 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7395 : 1);
7396 } else {
7397 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7398 StructBaseCombinedInfo.BasePointers.push_back(
7399 BP.emitRawPointer(CGF));
7400 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7401 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7402 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7403 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7404 Size, CGF.Int64Ty, /*isSigned=*/true));
7405 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7406 IsNonContiguous ? DimSize : 1);
7407 }
7408
7409 // If Mapper is valid, the last component inherits the mapper.
7410 bool HasMapper = Mapper && Next == CE;
7411 if (!IsMappingWholeStruct)
7412 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7413 else
7414 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7415 : nullptr);
7416
7417 // We need to add a pointer flag for each map that comes from the
7418 // same expression except for the first one. We also need to signal
7419 // this map is the first one that relates with the current capture
7420 // (there is a set of entries for each capture).
7421 OpenMPOffloadMappingFlags Flags =
7422 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7423 !IsExpressionFirstInfo || RequiresReference ||
7424 FirstPointerInComplexData || IsMemberReference,
7425 AreBothBasePtrAndPteeMapped ||
7426 (IsCaptureFirstInfo && !RequiresReference),
7427 IsNonContiguous);
7428
7429 if (!IsExpressionFirstInfo || IsMemberReference) {
7430 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7431 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7432 if (IsPointer || (IsMemberReference && Next != CE))
7433 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7434 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7435 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7436 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7437 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7438
7439 if (ShouldBeMemberOf) {
7440 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7441 // should be later updated with the correct value of MEMBER_OF.
7442 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7443 // From now on, all subsequent PTR_AND_OBJ entries should not be
7444 // marked as MEMBER_OF.
7445 ShouldBeMemberOf = false;
7446 }
7447 }
7448
7449 if (!IsMappingWholeStruct)
7450 CombinedInfo.Types.push_back(Flags);
7451 else
7452 StructBaseCombinedInfo.Types.push_back(Flags);
7453 }
7454
7455 // If we have encountered a member expression so far, keep track of the
7456 // mapped member. If the parent is "*this", then the value declaration
7457 // is nullptr.
7458 if (EncounteredME) {
7459 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7460 unsigned FieldIndex = FD->getFieldIndex();
7461
7462 // Update info about the lowest and highest elements for this struct
7463 if (!PartialStruct.Base.isValid()) {
7464 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7465 if (IsFinalArraySection) {
7466 Address HB =
7467 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7468 .getAddress();
7469 PartialStruct.HighestElem = {FieldIndex, HB};
7470 } else {
7471 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7472 }
7473 PartialStruct.Base = BP;
7474 PartialStruct.LB = BP;
7475 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7476 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7477 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7478 if (IsFinalArraySection) {
7479 Address HB =
7480 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7481 .getAddress();
7482 PartialStruct.HighestElem = {FieldIndex, HB};
7483 } else {
7484 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7485 }
7486 }
7487 }
7488
7489 // Need to emit combined struct for array sections.
7490 if (IsFinalArraySection || IsNonContiguous)
7491 PartialStruct.IsArraySection = true;
7492
7493 // If we have a final array section, we are done with this expression.
7494 if (IsFinalArraySection)
7495 break;
7496
7497 // The pointer becomes the base for the next element.
7498 if (Next != CE)
7499 BP = IsMemberReference ? LowestElem : LB;
7500 if (!IsPartialMapped)
7501 IsExpressionFirstInfo = false;
7502 IsCaptureFirstInfo = false;
7503 FirstPointerInComplexData = false;
7504 IsPrevMemberReference = IsMemberReference;
7505 } else if (FirstPointerInComplexData) {
7506 QualType Ty = Components.rbegin()
7507 ->getAssociatedDeclaration()
7508 ->getType()
7509 .getNonReferenceType();
7510 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7511 FirstPointerInComplexData = false;
7512 }
7513 }
7514 // If ran into the whole component - allocate the space for the whole
7515 // record.
7516 if (!EncounteredME)
7517 PartialStruct.HasCompleteRecord = true;
7518
7519 if (!IsNonContiguous)
7520 return;
7521
7522 const ASTContext &Context = CGF.getContext();
7523
7524 // For supporting stride in array section, we need to initialize the first
7525 // dimension size as 1, first offset as 0, and first count as 1
7526 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7527 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7528 MapValuesArrayTy CurStrides;
7529 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7530 uint64_t ElementTypeSize;
7531
7532 // Collect Size information for each dimension and get the element size as
7533 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7534 // should be [10, 10] and the first stride is 4 btyes.
7536 Components) {
7537 const Expr *AssocExpr = Component.getAssociatedExpression();
7538 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7539
7540 if (!OASE)
7541 continue;
7542
7543 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7544 auto *CAT = Context.getAsConstantArrayType(Ty);
7545 auto *VAT = Context.getAsVariableArrayType(Ty);
7546
7547 // We need all the dimension size except for the last dimension.
7548 assert((VAT || CAT || &Component == &*Components.begin()) &&
7549 "Should be either ConstantArray or VariableArray if not the "
7550 "first Component");
7551
7552 // Get element size if CurStrides is empty.
7553 if (CurStrides.empty()) {
7554 const Type *ElementType = nullptr;
7555 if (CAT)
7556 ElementType = CAT->getElementType().getTypePtr();
7557 else if (VAT)
7558 ElementType = VAT->getElementType().getTypePtr();
7559 else
7560 assert(&Component == &*Components.begin() &&
7561 "Only expect pointer (non CAT or VAT) when this is the "
7562 "first Component");
7563 // If ElementType is null, then it means the base is a pointer
7564 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7565 // for next iteration.
7566 if (ElementType) {
7567 // For the case that having pointer as base, we need to remove one
7568 // level of indirection.
7569 if (&Component != &*Components.begin())
7570 ElementType = ElementType->getPointeeOrArrayElementType();
7571 ElementTypeSize =
7572 Context.getTypeSizeInChars(ElementType).getQuantity();
7573 CurStrides.push_back(
7574 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7575 }
7576 }
7577 // Get dimension value except for the last dimension since we don't need
7578 // it.
7579 if (DimSizes.size() < Components.size() - 1) {
7580 if (CAT)
7581 DimSizes.push_back(
7582 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7583 else if (VAT)
7584 DimSizes.push_back(CGF.Builder.CreateIntCast(
7585 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7586 /*IsSigned=*/false));
7587 }
7588 }
7589
7590 // Skip the dummy dimension since we have already have its information.
7591 auto *DI = DimSizes.begin() + 1;
7592 // Product of dimension.
7593 llvm::Value *DimProd =
7594 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7595
7596 // Collect info for non-contiguous. Notice that offset, count, and stride
7597 // are only meaningful for array-section, so we insert a null for anything
7598 // other than array-section.
7599 // Also, the size of offset, count, and stride are not the same as
7600 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7601 // count, and stride are the same as the number of non-contiguous
7602 // declaration in target update to/from clause.
7604 Components) {
7605 const Expr *AssocExpr = Component.getAssociatedExpression();
7606
7607 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7608 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7609 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7610 /*isSigned=*/false);
7611 CurOffsets.push_back(Offset);
7612 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7613 CurStrides.push_back(CurStrides.back());
7614 continue;
7615 }
7616
7617 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7618
7619 if (!OASE)
7620 continue;
7621
7622 // Offset
7623 const Expr *OffsetExpr = OASE->getLowerBound();
7624 llvm::Value *Offset = nullptr;
7625 if (!OffsetExpr) {
7626 // If offset is absent, then we just set it to zero.
7627 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7628 } else {
7629 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7630 CGF.Int64Ty,
7631 /*isSigned=*/false);
7632 }
7633 CurOffsets.push_back(Offset);
7634
7635 // Count
7636 const Expr *CountExpr = OASE->getLength();
7637 llvm::Value *Count = nullptr;
7638 if (!CountExpr) {
7639 // In Clang, once a high dimension is an array section, we construct all
7640 // the lower dimension as array section, however, for case like
7641 // arr[0:2][2], Clang construct the inner dimension as an array section
7642 // but it actually is not in an array section form according to spec.
7643 if (!OASE->getColonLocFirst().isValid() &&
7644 !OASE->getColonLocSecond().isValid()) {
7645 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7646 } else {
7647 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7648 // When the length is absent it defaults to ⌈(size −
7649 // lower-bound)/stride⌉, where size is the size of the array
7650 // dimension.
7651 const Expr *StrideExpr = OASE->getStride();
7652 llvm::Value *Stride =
7653 StrideExpr
7654 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7655 CGF.Int64Ty, /*isSigned=*/false)
7656 : nullptr;
7657 if (Stride)
7658 Count = CGF.Builder.CreateUDiv(
7659 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7660 else
7661 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7662 }
7663 } else {
7664 Count = CGF.EmitScalarExpr(CountExpr);
7665 }
7666 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7667 CurCounts.push_back(Count);
7668
7669 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7670 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7671 // Offset Count Stride
7672 // D0 0 1 4 (int) <- dummy dimension
7673 // D1 0 2 8 (2 * (1) * 4)
7674 // D2 1 2 20 (1 * (1 * 5) * 4)
7675 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7676 const Expr *StrideExpr = OASE->getStride();
7677 llvm::Value *Stride =
7678 StrideExpr
7679 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7680 CGF.Int64Ty, /*isSigned=*/false)
7681 : nullptr;
7682 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7683 if (Stride)
7684 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7685 else
7686 CurStrides.push_back(DimProd);
7687 if (DI != DimSizes.end())
7688 ++DI;
7689 }
7690
7691 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7692 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7693 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7694 }
7695
7696 /// Return the adjusted map modifiers if the declaration a capture refers to
7697 /// appears in a first-private clause. This is expected to be used only with
7698 /// directives that start with 'target'.
7699 OpenMPOffloadMappingFlags
7700 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7701 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7702
7703 // A first private variable captured by reference will use only the
7704 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7705 // declaration is known as first-private in this handler.
7706 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7707 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7708 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7709 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7710 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7711 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7712 }
7713 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7714 if (I != LambdasMap.end())
7715 // for map(to: lambda): using user specified map type.
7716 return getMapTypeBits(
7717 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7718 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7719 /*AddPtrFlag=*/false,
7720 /*AddIsTargetParamFlag=*/false,
7721 /*isNonContiguous=*/false);
7722 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7723 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7724 }
7725
7726 void getPlainLayout(const CXXRecordDecl *RD,
7728 bool AsBase) const {
7729 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7730
7731 llvm::StructType *St =
7732 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7733
7734 unsigned NumElements = St->getNumElements();
7736 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7737 RecordLayout(NumElements);
7738
7739 // Fill bases.
7740 for (const auto &I : RD->bases()) {
7741 if (I.isVirtual())
7742 continue;
7743
7744 QualType BaseTy = I.getType();
7745 const auto *Base = BaseTy->getAsCXXRecordDecl();
7746 // Ignore empty bases.
7747 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
7748 CGF.getContext()
7751 .isZero())
7752 continue;
7753
7754 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7755 RecordLayout[FieldIndex] = Base;
7756 }
7757 // Fill in virtual bases.
7758 for (const auto &I : RD->vbases()) {
7759 QualType BaseTy = I.getType();
7760 // Ignore empty bases.
7761 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
7762 continue;
7763
7764 const auto *Base = BaseTy->getAsCXXRecordDecl();
7765 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7766 if (RecordLayout[FieldIndex])
7767 continue;
7768 RecordLayout[FieldIndex] = Base;
7769 }
7770 // Fill in all the fields.
7771 assert(!RD->isUnion() && "Unexpected union.");
7772 for (const auto *Field : RD->fields()) {
7773 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7774 // will fill in later.)
7775 if (!Field->isBitField() &&
7776 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
7777 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7778 RecordLayout[FieldIndex] = Field;
7779 }
7780 }
7781 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7782 &Data : RecordLayout) {
7783 if (Data.isNull())
7784 continue;
7785 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7786 getPlainLayout(Base, Layout, /*AsBase=*/true);
7787 else
7788 Layout.push_back(Data.get<const FieldDecl *>());
7789 }
7790 }
7791
7792 /// Generate all the base pointers, section pointers, sizes, map types, and
7793 /// mappers for the extracted mappable expressions (all included in \a
7794 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7795 /// pair of the relevant declaration and index where it occurs is appended to
7796 /// the device pointers info array.
7797 void generateAllInfoForClauses(
7798 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7799 llvm::OpenMPIRBuilder &OMPBuilder,
7800 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7802 // We have to process the component lists that relate with the same
7803 // declaration in a single chunk so that we can generate the map flags
7804 // correctly. Therefore, we organize all lists in a map.
7805 enum MapKind { Present, Allocs, Other, Total };
7806 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7808 Info;
7809
7810 // Helper function to fill the information map for the different supported
7811 // clauses.
7812 auto &&InfoGen =
7813 [&Info, &SkipVarSet](
7814 const ValueDecl *D, MapKind Kind,
7816 OpenMPMapClauseKind MapType,
7818 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7819 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7820 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7821 if (SkipVarSet.contains(D))
7822 return;
7823 auto It = Info.find(D);
7824 if (It == Info.end())
7825 It = Info
7826 .insert(std::make_pair(
7828 .first;
7829 It->second[Kind].emplace_back(
7830 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7831 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7832 };
7833
7834 for (const auto *Cl : Clauses) {
7835 const auto *C = dyn_cast<OMPMapClause>(Cl);
7836 if (!C)
7837 continue;
7838 MapKind Kind = Other;
7839 if (llvm::is_contained(C->getMapTypeModifiers(),
7840 OMPC_MAP_MODIFIER_present))
7841 Kind = Present;
7842 else if (C->getMapType() == OMPC_MAP_alloc)
7843 Kind = Allocs;
7844 const auto *EI = C->getVarRefs().begin();
7845 for (const auto L : C->component_lists()) {
7846 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7847 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7848 C->getMapTypeModifiers(), std::nullopt,
7849 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7850 E);
7851 ++EI;
7852 }
7853 }
7854 for (const auto *Cl : Clauses) {
7855 const auto *C = dyn_cast<OMPToClause>(Cl);
7856 if (!C)
7857 continue;
7858 MapKind Kind = Other;
7859 if (llvm::is_contained(C->getMotionModifiers(),
7860 OMPC_MOTION_MODIFIER_present))
7861 Kind = Present;
7862 const auto *EI = C->getVarRefs().begin();
7863 for (const auto L : C->component_lists()) {
7864 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7865 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7866 C->isImplicit(), std::get<2>(L), *EI);
7867 ++EI;
7868 }
7869 }
7870 for (const auto *Cl : Clauses) {
7871 const auto *C = dyn_cast<OMPFromClause>(Cl);
7872 if (!C)
7873 continue;
7874 MapKind Kind = Other;
7875 if (llvm::is_contained(C->getMotionModifiers(),
7876 OMPC_MOTION_MODIFIER_present))
7877 Kind = Present;
7878 const auto *EI = C->getVarRefs().begin();
7879 for (const auto L : C->component_lists()) {
7880 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7881 std::nullopt, C->getMotionModifiers(),
7882 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7883 *EI);
7884 ++EI;
7885 }
7886 }
7887
7888 // Look at the use_device_ptr and use_device_addr clauses information and
7889 // mark the existing map entries as such. If there is no map information for
7890 // an entry in the use_device_ptr and use_device_addr list, we create one
7891 // with map type 'alloc' and zero size section. It is the user fault if that
7892 // was not mapped before. If there is no map information and the pointer is
7893 // a struct member, then we defer the emission of that entry until the whole
7894 // struct has been processed.
7895 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7897 DeferredInfo;
7898 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7899
7900 auto &&UseDeviceDataCombinedInfoGen =
7901 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7902 CodeGenFunction &CGF, bool IsDevAddr) {
7903 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
7904 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
7905 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
7906 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7907 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7908 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
7909 UseDeviceDataCombinedInfo.Sizes.push_back(
7910 llvm::Constant::getNullValue(CGF.Int64Ty));
7911 UseDeviceDataCombinedInfo.Types.push_back(
7912 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7913 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
7914 };
7915
7916 auto &&MapInfoGen =
7917 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7918 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7920 Components,
7921 bool IsImplicit, bool IsDevAddr) {
7922 // We didn't find any match in our map information - generate a zero
7923 // size array section - if the pointer is a struct member we defer
7924 // this action until the whole struct has been processed.
7925 if (isa<MemberExpr>(IE)) {
7926 // Insert the pointer into Info to be processed by
7927 // generateInfoForComponentList. Because it is a member pointer
7928 // without a pointee, no entry will be generated for it, therefore
7929 // we need to generate one after the whole struct has been
7930 // processed. Nonetheless, generateInfoForComponentList must be
7931 // called to take the pointer into account for the calculation of
7932 // the range of the partial struct.
7933 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7934 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7935 nullptr, nullptr, IsDevAddr);
7936 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
7937 } else {
7938 llvm::Value *Ptr;
7939 if (IsDevAddr) {
7940 if (IE->isGLValue())
7941 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
7942 else
7943 Ptr = CGF.EmitScalarExpr(IE);
7944 } else {
7945 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
7946 }
7947 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7948 }
7949 };
7950
7951 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7952 const Expr *IE, bool IsDevAddr) -> bool {
7953 // We potentially have map information for this declaration already.
7954 // Look for the first set of components that refer to it. If found,
7955 // return true.
7956 // If the first component is a member expression, we have to look into
7957 // 'this', which maps to null in the map of map information. Otherwise
7958 // look directly for the information.
7959 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7960 if (It != Info.end()) {
7961 bool Found = false;
7962 for (auto &Data : It->second) {
7963 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7964 return MI.Components.back().getAssociatedDeclaration() == VD;
7965 });
7966 // If we found a map entry, signal that the pointer has to be
7967 // returned and move on to the next declaration. Exclude cases where
7968 // the base pointer is mapped as array subscript, array section or
7969 // array shaping. The base address is passed as a pointer to base in
7970 // this case and cannot be used as a base for use_device_ptr list
7971 // item.
7972 if (CI != Data.end()) {
7973 if (IsDevAddr) {
7974 CI->ForDeviceAddr = IsDevAddr;
7975 CI->ReturnDevicePointer = true;
7976 Found = true;
7977 break;
7978 } else {
7979 auto PrevCI = std::next(CI->Components.rbegin());
7980 const auto *VarD = dyn_cast<VarDecl>(VD);
7982 isa<MemberExpr>(IE) ||
7983 !VD->getType().getNonReferenceType()->isPointerType() ||
7984 PrevCI == CI->Components.rend() ||
7985 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7986 VarD->hasLocalStorage()) {
7987 CI->ForDeviceAddr = IsDevAddr;
7988 CI->ReturnDevicePointer = true;
7989 Found = true;
7990 break;
7991 }
7992 }
7993 }
7994 }
7995 return Found;
7996 }
7997 return false;
7998 };
7999
8000 // Look at the use_device_ptr clause information and mark the existing map
8001 // entries as such. If there is no map information for an entry in the
8002 // use_device_ptr list, we create one with map type 'alloc' and zero size
8003 // section. It is the user fault if that was not mapped before. If there is
8004 // no map information and the pointer is a struct member, then we defer the
8005 // emission of that entry until the whole struct has been processed.
8006 for (const auto *Cl : Clauses) {
8007 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8008 if (!C)
8009 continue;
8010 for (const auto L : C->component_lists()) {
8012 std::get<1>(L);
8013 assert(!Components.empty() &&
8014 "Not expecting empty list of components!");
8015 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8016 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8017 const Expr *IE = Components.back().getAssociatedExpression();
8018 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8019 continue;
8020 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8021 /*IsDevAddr=*/false);
8022 }
8023 }
8024
8025 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8026 for (const auto *Cl : Clauses) {
8027 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8028 if (!C)
8029 continue;
8030 for (const auto L : C->component_lists()) {
8032 std::get<1>(L);
8033 assert(!std::get<1>(L).empty() &&
8034 "Not expecting empty list of components!");
8035 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8036 if (!Processed.insert(VD).second)
8037 continue;
8038 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8039 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8040 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8041 continue;
8042 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8043 /*IsDevAddr=*/true);
8044 }
8045 }
8046
8047 for (const auto &Data : Info) {
8048 StructRangeInfoTy PartialStruct;
8049 // Current struct information:
8050 MapCombinedInfoTy CurInfo;
8051 // Current struct base information:
8052 MapCombinedInfoTy StructBaseCurInfo;
8053 const Decl *D = Data.first;
8054 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8055 bool HasMapBasePtr = false;
8056 bool HasMapArraySec = false;
8057 if (VD && VD->getType()->isAnyPointerType()) {
8058 for (const auto &M : Data.second) {
8059 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8060 return isa_and_present<DeclRefExpr>(L.VarRef);
8061 });
8062 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8063 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8064 L.VarRef);
8065 });
8066 if (HasMapBasePtr && HasMapArraySec)
8067 break;
8068 }
8069 }
8070 for (const auto &M : Data.second) {
8071 for (const MapInfo &L : M) {
8072 assert(!L.Components.empty() &&
8073 "Not expecting declaration with no component lists.");
8074
8075 // Remember the current base pointer index.
8076 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8077 unsigned StructBasePointersIdx =
8078 StructBaseCurInfo.BasePointers.size();
8079 CurInfo.NonContigInfo.IsNonContiguous =
8080 L.Components.back().isNonContiguous();
8081 generateInfoForComponentList(
8082 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8083 CurInfo, StructBaseCurInfo, PartialStruct,
8084 /*IsFirstComponentList=*/false, L.IsImplicit,
8085 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8086 L.VarRef, /*OverlappedElements*/ std::nullopt,
8087 HasMapBasePtr && HasMapArraySec);
8088
8089 // If this entry relates to a device pointer, set the relevant
8090 // declaration and add the 'return pointer' flag.
8091 if (L.ReturnDevicePointer) {
8092 // Check whether a value was added to either CurInfo or
8093 // StructBaseCurInfo and error if no value was added to either of
8094 // them:
8095 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8096 StructBasePointersIdx <
8097 StructBaseCurInfo.BasePointers.size()) &&
8098 "Unexpected number of mapped base pointers.");
8099
8100 // Choose a base pointer index which is always valid:
8101 const ValueDecl *RelevantVD =
8102 L.Components.back().getAssociatedDeclaration();
8103 assert(RelevantVD &&
8104 "No relevant declaration related with device pointer??");
8105
8106 // If StructBaseCurInfo has been updated this iteration then work on
8107 // the first new entry added to it i.e. make sure that when multiple
8108 // values are added to any of the lists, the first value added is
8109 // being modified by the assignments below (not the last value
8110 // added).
8111 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8112 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8113 RelevantVD;
8114 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8115 L.ForDeviceAddr ? DeviceInfoTy::Address
8116 : DeviceInfoTy::Pointer;
8117 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8118 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8119 } else {
8120 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8121 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8122 L.ForDeviceAddr ? DeviceInfoTy::Address
8123 : DeviceInfoTy::Pointer;
8124 CurInfo.Types[CurrentBasePointersIdx] |=
8125 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8126 }
8127 }
8128 }
8129 }
8130
8131 // Append any pending zero-length pointers which are struct members and
8132 // used with use_device_ptr or use_device_addr.
8133 auto CI = DeferredInfo.find(Data.first);
8134 if (CI != DeferredInfo.end()) {
8135 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8136 llvm::Value *BasePtr;
8137 llvm::Value *Ptr;
8138 if (L.ForDeviceAddr) {
8139 if (L.IE->isGLValue())
8140 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8141 else
8142 Ptr = this->CGF.EmitScalarExpr(L.IE);
8143 BasePtr = Ptr;
8144 // Entry is RETURN_PARAM. Also, set the placeholder value
8145 // MEMBER_OF=FFFF so that the entry is later updated with the
8146 // correct value of MEMBER_OF.
8147 CurInfo.Types.push_back(
8148 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8149 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8150 } else {
8151 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8152 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8153 L.IE->getExprLoc());
8154 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8155 // placeholder value MEMBER_OF=FFFF so that the entry is later
8156 // updated with the correct value of MEMBER_OF.
8157 CurInfo.Types.push_back(
8158 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8159 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8160 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8161 }
8162 CurInfo.Exprs.push_back(L.VD);
8163 CurInfo.BasePointers.emplace_back(BasePtr);
8164 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8165 CurInfo.DevicePointers.emplace_back(
8166 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8167 CurInfo.Pointers.push_back(Ptr);
8168 CurInfo.Sizes.push_back(
8169 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8170 CurInfo.Mappers.push_back(nullptr);
8171 }
8172 }
8173
8174 // Unify entries in one list making sure the struct mapping precedes the
8175 // individual fields:
8176 MapCombinedInfoTy UnionCurInfo;
8177 UnionCurInfo.append(StructBaseCurInfo);
8178 UnionCurInfo.append(CurInfo);
8179
8180 // If there is an entry in PartialStruct it means we have a struct with
8181 // individual members mapped. Emit an extra combined entry.
8182 if (PartialStruct.Base.isValid()) {
8183 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8184 // Emit a combined entry:
8185 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8186 /*IsMapThis*/ !VD, OMPBuilder, VD);
8187 }
8188
8189 // We need to append the results of this capture to what we already have.
8190 CombinedInfo.append(UnionCurInfo);
8191 }
8192 // Append data for use_device_ptr clauses.
8193 CombinedInfo.append(UseDeviceDataCombinedInfo);
8194 }
8195
8196public:
8197 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8198 : CurDir(&Dir), CGF(CGF) {
8199 // Extract firstprivate clause information.
8200 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8201 for (const auto *D : C->varlist())
8202 FirstPrivateDecls.try_emplace(
8203 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8204 // Extract implicit firstprivates from uses_allocators clauses.
8205 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8206 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8207 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8208 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8209 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8210 /*Implicit=*/true);
8211 else if (const auto *VD = dyn_cast<VarDecl>(
8212 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8213 ->getDecl()))
8214 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8215 }
8216 }
8217 // Extract device pointer clause information.
8218 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8219 for (auto L : C->component_lists())
8220 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8221 // Extract device addr clause information.
8222 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8223 for (auto L : C->component_lists())
8224 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8225 // Extract map information.
8226 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8227 if (C->getMapType() != OMPC_MAP_to)
8228 continue;
8229 for (auto L : C->component_lists()) {
8230 const ValueDecl *VD = std::get<0>(L);
8231 const auto *RD = VD ? VD->getType()
8235 : nullptr;
8236 if (RD && RD->isLambda())
8237 LambdasMap.try_emplace(std::get<0>(L), C);
8238 }
8239 }
8240 }
8241
8242 /// Constructor for the declare mapper directive.
8243 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8244 : CurDir(&Dir), CGF(CGF) {}
8245
8246 /// Generate code for the combined entry if we have a partially mapped struct
8247 /// and take care of the mapping flags of the arguments corresponding to
8248 /// individual struct members.
8249 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8250 MapFlagsArrayTy &CurTypes,
8251 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8252 llvm::OpenMPIRBuilder &OMPBuilder,
8253 const ValueDecl *VD = nullptr,
8254 bool NotTargetParams = true) const {
8255 if (CurTypes.size() == 1 &&
8256 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8257 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8258 !PartialStruct.IsArraySection)
8259 return;
8260 Address LBAddr = PartialStruct.LowestElem.second;
8261 Address HBAddr = PartialStruct.HighestElem.second;
8262 if (PartialStruct.HasCompleteRecord) {
8263 LBAddr = PartialStruct.LB;
8264 HBAddr = PartialStruct.LB;
8265 }
8266 CombinedInfo.Exprs.push_back(VD);
8267 // Base is the base of the struct
8268 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8269 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8270 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8271 // Pointer is the address of the lowest element
8272 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8273 const CXXMethodDecl *MD =
8274 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8275 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8276 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8277 // There should not be a mapper for a combined entry.
8278 if (HasBaseClass) {
8279 // OpenMP 5.2 148:21:
8280 // If the target construct is within a class non-static member function,
8281 // and a variable is an accessible data member of the object for which the
8282 // non-static data member function is invoked, the variable is treated as
8283 // if the this[:1] expression had appeared in a map clause with a map-type
8284 // of tofrom.
8285 // Emit this[:1]
8286 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8288 llvm::Value *Size =
8289 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8290 /*isSigned=*/true);
8291 CombinedInfo.Sizes.push_back(Size);
8292 } else {
8293 CombinedInfo.Pointers.push_back(LB);
8294 // Size is (addr of {highest+1} element) - (addr of lowest element)
8295 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8296 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8297 HBAddr.getElementType(), HB, /*Idx0=*/1);
8298 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8299 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8300 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8301 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8302 /*isSigned=*/false);
8303 CombinedInfo.Sizes.push_back(Size);
8304 }
8305 CombinedInfo.Mappers.push_back(nullptr);
8306 // Map type is always TARGET_PARAM, if generate info for captures.
8307 CombinedInfo.Types.push_back(
8308 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8309 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8310 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8311 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8312 // If any element has the present modifier, then make sure the runtime
8313 // doesn't attempt to allocate the struct.
8314 if (CurTypes.end() !=
8315 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8316 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8317 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8318 }))
8319 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8320 // Remove TARGET_PARAM flag from the first element
8321 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8322 // If any element has the ompx_hold modifier, then make sure the runtime
8323 // uses the hold reference count for the struct as a whole so that it won't
8324 // be unmapped by an extra dynamic reference count decrement. Add it to all
8325 // elements as well so the runtime knows which reference count to check
8326 // when determining whether it's time for device-to-host transfers of
8327 // individual elements.
8328 if (CurTypes.end() !=
8329 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8330 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8331 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8332 })) {
8333 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8334 for (auto &M : CurTypes)
8335 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8336 }
8337
8338 // All other current entries will be MEMBER_OF the combined entry
8339 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8340 // 0xFFFF in the MEMBER_OF field).
8341 OpenMPOffloadMappingFlags MemberOfFlag =
8342 OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8343 for (auto &M : CurTypes)
8344 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8345 }
8346
8347 /// Generate all the base pointers, section pointers, sizes, map types, and
8348 /// mappers for the extracted mappable expressions (all included in \a
8349 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8350 /// pair of the relevant declaration and index where it occurs is appended to
8351 /// the device pointers info array.
8352 void generateAllInfo(
8353 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8354 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8356 assert(CurDir.is<const OMPExecutableDirective *>() &&
8357 "Expect a executable directive");
8358 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8359 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8360 SkipVarSet);
8361 }
8362
8363 /// Generate all the base pointers, section pointers, sizes, map types, and
8364 /// mappers for the extracted map clauses of user-defined mapper (all included
8365 /// in \a CombinedInfo).
8366 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8367 llvm::OpenMPIRBuilder &OMPBuilder) const {
8368 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8369 "Expect a declare mapper directive");
8370 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8371 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8372 OMPBuilder);
8373 }
8374
8375 /// Emit capture info for lambdas for variables captured by reference.
8376 void generateInfoForLambdaCaptures(
8377 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8378 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8380 const auto *RD = VDType->getAsCXXRecordDecl();
8381 if (!RD || !RD->isLambda())
8382 return;
8383 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8384 CGF.getContext().getDeclAlign(VD));
8385 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8386 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8387 FieldDecl *ThisCapture = nullptr;
8388 RD->getCaptureFields(Captures, ThisCapture);
8389 if (ThisCapture) {
8390 LValue ThisLVal =
8391 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8392 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8393 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8394 VDLVal.getPointer(CGF));
8395 CombinedInfo.Exprs.push_back(VD);
8396 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8397 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8398 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8399 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8400 CombinedInfo.Sizes.push_back(
8401 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8402 CGF.Int64Ty, /*isSigned=*/true));
8403 CombinedInfo.Types.push_back(
8404 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8405 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8406 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8407 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8408 CombinedInfo.Mappers.push_back(nullptr);
8409 }
8410 for (const LambdaCapture &LC : RD->captures()) {
8411 if (!LC.capturesVariable())
8412 continue;
8413 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8414 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8415 continue;
8416 auto It = Captures.find(VD);
8417 assert(It != Captures.end() && "Found lambda capture without field.");
8418 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8419 if (LC.getCaptureKind() == LCK_ByRef) {
8420 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8421 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8422 VDLVal.getPointer(CGF));
8423 CombinedInfo.Exprs.push_back(VD);
8424 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8425 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8426 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8427 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8428 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8429 CGF.getTypeSize(
8431 CGF.Int64Ty, /*isSigned=*/true));
8432 } else {
8433 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8434 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8435 VDLVal.getPointer(CGF));
8436 CombinedInfo.Exprs.push_back(VD);
8437 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8438 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8439 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8440 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8441 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8442 }
8443 CombinedInfo.Types.push_back(
8444 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8445 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8446 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8447 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8448 CombinedInfo.Mappers.push_back(nullptr);
8449 }
8450 }
8451
8452 /// Set correct indices for lambdas captures.
8453 void adjustMemberOfForLambdaCaptures(
8454 llvm::OpenMPIRBuilder &OMPBuilder,
8455 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8456 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8457 MapFlagsArrayTy &Types) const {
8458 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8459 // Set correct member_of idx for all implicit lambda captures.
8460 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8461 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8462 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8463 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8464 continue;
8465 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8466 assert(BasePtr && "Unable to find base lambda address.");
8467 int TgtIdx = -1;
8468 for (unsigned J = I; J > 0; --J) {
8469 unsigned Idx = J - 1;
8470 if (Pointers[Idx] != BasePtr)
8471 continue;
8472 TgtIdx = Idx;
8473 break;
8474 }
8475 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8476 // All other current entries will be MEMBER_OF the combined entry
8477 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8478 // 0xFFFF in the MEMBER_OF field).
8479 OpenMPOffloadMappingFlags MemberOfFlag =
8480 OMPBuilder.getMemberOfFlag(TgtIdx);
8481 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8482 }
8483 }
8484
8485 /// Generate the base pointers, section pointers, sizes, map types, and
8486 /// mappers associated to a given capture (all included in \a CombinedInfo).
8487 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8488 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8489 StructRangeInfoTy &PartialStruct) const {
8490 assert(!Cap->capturesVariableArrayType() &&
8491 "Not expecting to generate map info for a variable array type!");
8492
8493 // We need to know when we generating information for the first component
8494 const ValueDecl *VD = Cap->capturesThis()
8495 ? nullptr
8496 : Cap->getCapturedVar()->getCanonicalDecl();
8497
8498 // for map(to: lambda): skip here, processing it in
8499 // generateDefaultMapInfo
8500 if (LambdasMap.count(VD))
8501 return;
8502
8503 // If this declaration appears in a is_device_ptr clause we just have to
8504 // pass the pointer by value. If it is a reference to a declaration, we just
8505 // pass its value.
8506 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8507 CombinedInfo.Exprs.push_back(VD);
8508 CombinedInfo.BasePointers.emplace_back(Arg);
8509 CombinedInfo.DevicePtrDecls.emplace_back(VD);
8510 CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8511 CombinedInfo.Pointers.push_back(Arg);
8512 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8513 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8514 /*isSigned=*/true));
8515 CombinedInfo.Types.push_back(
8516 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8517 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8518 CombinedInfo.Mappers.push_back(nullptr);
8519 return;
8520 }
8521
8522 using MapData =
8525 const ValueDecl *, const Expr *>;
8526 SmallVector<MapData, 4> DeclComponentLists;
8527 // For member fields list in is_device_ptr, store it in
8528 // DeclComponentLists for generating components info.
8530 auto It = DevPointersMap.find(VD);
8531 if (It != DevPointersMap.end())
8532 for (const auto &MCL : It->second)
8533 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8534 /*IsImpicit = */ true, nullptr,
8535 nullptr);
8536 auto I = HasDevAddrsMap.find(VD);
8537 if (I != HasDevAddrsMap.end())
8538 for (const auto &MCL : I->second)
8539 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8540 /*IsImpicit = */ true, nullptr,
8541 nullptr);
8542 assert(CurDir.is<const OMPExecutableDirective *>() &&
8543 "Expect a executable directive");
8544 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8545 bool HasMapBasePtr = false;
8546 bool HasMapArraySec = false;
8547 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8548 const auto *EI = C->getVarRefs().begin();
8549 for (const auto L : C->decl_component_lists(VD)) {
8550 const ValueDecl *VDecl, *Mapper;
8551 // The Expression is not correct if the mapping is implicit
8552 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8554 std::tie(VDecl, Components, Mapper) = L;
8555 assert(VDecl == VD && "We got information for the wrong declaration??");
8556 assert(!Components.empty() &&
8557 "Not expecting declaration with no component lists.");
8558 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8559 HasMapBasePtr = true;
8560 if (VD && E && VD->getType()->isAnyPointerType() &&
8561 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8562 HasMapArraySec = true;
8563 DeclComponentLists.emplace_back(Components, C->getMapType(),
8564 C->getMapTypeModifiers(),
8565 C->isImplicit(), Mapper, E);
8566 ++EI;
8567 }
8568 }
8569 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8570 const MapData &RHS) {
8571 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8572 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8573 bool HasPresent =
8574 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8575 bool HasAllocs = MapType == OMPC_MAP_alloc;
8576 MapModifiers = std::get<2>(RHS);
8577 MapType = std::get<1>(LHS);
8578 bool HasPresentR =
8579 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8580 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8581 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8582 });
8583
8584 // Find overlapping elements (including the offset from the base element).
8585 llvm::SmallDenseMap<
8586 const MapData *,
8589 4>
8590 OverlappedData;
8591 size_t Count = 0;
8592 for (const MapData &L : DeclComponentLists) {
8594 OpenMPMapClauseKind MapType;
8596 bool IsImplicit;
8597 const ValueDecl *Mapper;
8598 const Expr *VarRef;
8599 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8600 L;
8601 ++Count;
8602 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8604 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8605 VarRef) = L1;
8606 auto CI = Components.rbegin();
8607 auto CE = Components.rend();
8608 auto SI = Components1.rbegin();
8609 auto SE = Components1.rend();
8610 for (; CI != CE && SI != SE; ++CI, ++SI) {
8611 if (CI->getAssociatedExpression()->getStmtClass() !=
8612 SI->getAssociatedExpression()->getStmtClass())
8613 break;
8614 // Are we dealing with different variables/fields?
8615 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8616 break;
8617 }
8618 // Found overlapping if, at least for one component, reached the head
8619 // of the components list.
8620 if (CI == CE || SI == SE) {
8621 // Ignore it if it is the same component.
8622 if (CI == CE && SI == SE)
8623 continue;
8624 const auto It = (SI == SE) ? CI : SI;
8625 // If one component is a pointer and another one is a kind of
8626 // dereference of this pointer (array subscript, section, dereference,
8627 // etc.), it is not an overlapping.
8628 // Same, if one component is a base and another component is a
8629 // dereferenced pointer memberexpr with the same base.
8630 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8631 (std::prev(It)->getAssociatedDeclaration() &&
8632 std::prev(It)
8633 ->getAssociatedDeclaration()
8634 ->getType()
8635 ->isPointerType()) ||
8636 (It->getAssociatedDeclaration() &&
8637 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8638 std::next(It) != CE && std::next(It) != SE))
8639 continue;
8640 const MapData &BaseData = CI == CE ? L : L1;
8642 SI == SE ? Components : Components1;
8643 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8644 OverlappedElements.getSecond().push_back(SubData);
8645 }
8646 }
8647 }
8648 // Sort the overlapped elements for each item.
8650 if (!OverlappedData.empty()) {
8651 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8652 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8653 while (BaseType != OrigType) {
8654 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8655 OrigType = BaseType->getPointeeOrArrayElementType();
8656 }
8657
8658 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8659 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8660 else {
8661 const auto *RD = BaseType->getAsRecordDecl();
8662 Layout.append(RD->field_begin(), RD->field_end());
8663 }
8664 }
8665 for (auto &Pair : OverlappedData) {
8666 llvm::stable_sort(
8667 Pair.getSecond(),
8668 [&Layout](
8671 Second) {
8672 auto CI = First.rbegin();
8673 auto CE = First.rend();
8674 auto SI = Second.rbegin();
8675 auto SE = Second.rend();
8676 for (; CI != CE && SI != SE; ++CI, ++SI) {
8677 if (CI->getAssociatedExpression()->getStmtClass() !=
8678 SI->getAssociatedExpression()->getStmtClass())
8679 break;
8680 // Are we dealing with different variables/fields?
8681 if (CI->getAssociatedDeclaration() !=
8682 SI->getAssociatedDeclaration())
8683 break;
8684 }
8685
8686 // Lists contain the same elements.
8687 if (CI == CE && SI == SE)
8688 return false;
8689
8690 // List with less elements is less than list with more elements.
8691 if (CI == CE || SI == SE)
8692 return CI == CE;
8693
8694 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8695 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8696 if (FD1->getParent() == FD2->getParent())
8697 return FD1->getFieldIndex() < FD2->getFieldIndex();
8698 const auto *It =
8699 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8700 return FD == FD1 || FD == FD2;
8701 });
8702 return *It == FD1;
8703 });
8704 }
8705
8706 // Associated with a capture, because the mapping flags depend on it.
8707 // Go through all of the elements with the overlapped elements.
8708 bool IsFirstComponentList = true;
8709 MapCombinedInfoTy StructBaseCombinedInfo;
8710 for (const auto &Pair : OverlappedData) {
8711 const MapData &L = *Pair.getFirst();
8713 OpenMPMapClauseKind MapType;
8715 bool IsImplicit;
8716 const ValueDecl *Mapper;
8717 const Expr *VarRef;
8718 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8719 L;
8721 OverlappedComponents = Pair.getSecond();
8722 generateInfoForComponentList(
8723 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8724 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8725 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8726 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8727 IsFirstComponentList = false;
8728 }
8729 // Go through other elements without overlapped elements.
8730 for (const MapData &L : DeclComponentLists) {
8732 OpenMPMapClauseKind MapType;
8734 bool IsImplicit;
8735 const ValueDecl *Mapper;
8736 const Expr *VarRef;
8737 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8738 L;
8739 auto It = OverlappedData.find(&L);
8740 if (It == OverlappedData.end())
8741 generateInfoForComponentList(
8742 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8743 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8744 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8745 /*ForDeviceAddr=*/false, VD, VarRef,
8746 /*OverlappedElements*/ std::nullopt,
8747 HasMapBasePtr && HasMapArraySec);
8748 IsFirstComponentList = false;
8749 }
8750 }
8751
8752 /// Generate the default map information for a given capture \a CI,
8753 /// record field declaration \a RI and captured value \a CV.
8754 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8755 const FieldDecl &RI, llvm::Value *CV,
8756 MapCombinedInfoTy &CombinedInfo) const {
8757 bool IsImplicit = true;
8758 // Do the default mapping.
8759 if (CI.capturesThis()) {
8760 CombinedInfo.Exprs.push_back(nullptr);
8761 CombinedInfo.BasePointers.push_back(CV);
8762 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8763 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8764 CombinedInfo.Pointers.push_back(CV);
8765 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8766 CombinedInfo.Sizes.push_back(
8767 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8768 CGF.Int64Ty, /*isSigned=*/true));
8769 // Default map type.
8770 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8771 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8772 } else if (CI.capturesVariableByCopy()) {
8773 const VarDecl *VD = CI.getCapturedVar();
8774 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8775 CombinedInfo.BasePointers.push_back(CV);
8776 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8777 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8778 CombinedInfo.Pointers.push_back(CV);
8779 if (!RI.getType()->isAnyPointerType()) {
8780 // We have to signal to the runtime captures passed by value that are
8781 // not pointers.
8782 CombinedInfo.Types.push_back(
8783 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8784 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8785 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8786 } else {
8787 // Pointers are implicitly mapped with a zero size and no flags
8788 // (other than first map that is added for all implicit maps).
8789 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8790 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8791 }
8792 auto I = FirstPrivateDecls.find(VD);
8793 if (I != FirstPrivateDecls.end())
8794 IsImplicit = I->getSecond();
8795 } else {
8796 assert(CI.capturesVariable() && "Expected captured reference.");
8797 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8798 QualType ElementType = PtrTy->getPointeeType();
8799 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8800 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8801 // The default map type for a scalar/complex type is 'to' because by
8802 // default the value doesn't have to be retrieved. For an aggregate
8803 // type, the default is 'tofrom'.
8804 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8805 const VarDecl *VD = CI.getCapturedVar();
8806 auto I = FirstPrivateDecls.find(VD);
8807 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8808 CombinedInfo.BasePointers.push_back(CV);
8809 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8810 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8811 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8812 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8813 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8815 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
8816 } else {
8817 CombinedInfo.Pointers.push_back(CV);
8818 }
8819 if (I != FirstPrivateDecls.end())
8820 IsImplicit = I->getSecond();
8821 }
8822 // Every default map produces a single argument which is a target parameter.
8823 CombinedInfo.Types.back() |=
8824 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8825
8826 // Add flag stating this is an implicit map.
8827 if (IsImplicit)
8828 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8829
8830 // No user-defined mapper for default mapping.
8831 CombinedInfo.Mappers.push_back(nullptr);
8832 }
8833};
8834} // anonymous namespace
8835
8836// Try to extract the base declaration from a `this->x` expression if possible.
8838 if (!E)
8839 return nullptr;
8840
8841 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
8842 if (const MemberExpr *ME =
8843 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
8844 return ME->getMemberDecl();
8845 return nullptr;
8846}
8847
8848/// Emit a string constant containing the names of the values mapped to the
8849/// offloading runtime library.
8850llvm::Constant *
8851emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8852 MappableExprsHandler::MappingExprInfo &MapExprs) {
8853
8854 uint32_t SrcLocStrSize;
8855 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8856 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8857
8859 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8860 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
8861 Loc = VD->getLocation();
8862 else
8863 Loc = MapExprs.getMapExpr()->getExprLoc();
8864 } else {
8865 Loc = MapExprs.getMapDecl()->getLocation();
8866 }
8867
8868 std::string ExprName;
8869 if (MapExprs.getMapExpr()) {
8871 llvm::raw_string_ostream OS(ExprName);
8872 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8873 OS.flush();
8874 } else {
8875 ExprName = MapExprs.getMapDecl()->getNameAsString();
8876 }
8877
8879 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
8880 PLoc.getLine(), PLoc.getColumn(),
8881 SrcLocStrSize);
8882}
8883/// Emit the arrays used to pass the captures and map information to the
8884/// offloading runtime library. If there is no map or capture information,
8885/// return nullptr by reference.
8887 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8888 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8889 bool IsNonContiguous = false, bool ForEndCall = false) {
8890 CodeGenModule &CGM = CGF.CGM;
8891
8892 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8893 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8894 CGF.AllocaInsertPt->getIterator());
8895 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8896 CGF.Builder.GetInsertPoint());
8897
8898 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8899 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8900 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
8901 }
8902 };
8903
8904 auto CustomMapperCB = [&](unsigned int I) {
8905 llvm::Value *MFunc = nullptr;
8906 if (CombinedInfo.Mappers[I]) {
8907 Info.HasMapper = true;
8909 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
8910 }
8911 return MFunc;
8912 };
8913 OMPBuilder.emitOffloadingArraysAndArgs(
8914 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous,
8915 ForEndCall, DeviceAddrCB, CustomMapperCB);
8916}
8917
8918/// Check for inner distribute directive.
8919static const OMPExecutableDirective *
8921 const auto *CS = D.getInnermostCapturedStmt();
8922 const auto *Body =
8923 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8924 const Stmt *ChildStmt =
8926
8927 if (const auto *NestedDir =
8928 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8929 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8930 switch (D.getDirectiveKind()) {
8931 case OMPD_target:
8932 // For now, treat 'target' with nested 'teams loop' as if it's
8933 // distributed (target teams distribute).
8934 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8935 return NestedDir;
8936 if (DKind == OMPD_teams) {
8937 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8938 /*IgnoreCaptured=*/true);
8939 if (!Body)
8940 return nullptr;
8941 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8942 if (const auto *NND =
8943 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8944 DKind = NND->getDirectiveKind();
8945 if (isOpenMPDistributeDirective(DKind))
8946 return NND;
8947 }
8948 }
8949 return nullptr;
8950 case OMPD_target_teams:
8951 if (isOpenMPDistributeDirective(DKind))
8952 return NestedDir;
8953 return nullptr;
8954 case OMPD_target_parallel:
8955 case OMPD_target_simd:
8956 case OMPD_target_parallel_for:
8957 case OMPD_target_parallel_for_simd:
8958 return nullptr;
8959 case OMPD_target_teams_distribute:
8960 case OMPD_target_teams_distribute_simd:
8961 case OMPD_target_teams_distribute_parallel_for:
8962 case OMPD_target_teams_distribute_parallel_for_simd:
8963 case OMPD_parallel:
8964 case OMPD_for:
8965 case OMPD_parallel_for:
8966 case OMPD_parallel_master:
8967 case OMPD_parallel_sections:
8968 case OMPD_for_simd:
8969 case OMPD_parallel_for_simd:
8970 case OMPD_cancel:
8971 case OMPD_cancellation_point:
8972 case OMPD_ordered:
8973 case OMPD_threadprivate:
8974 case OMPD_allocate:
8975 case OMPD_task:
8976 case OMPD_simd:
8977 case OMPD_tile:
8978 case OMPD_unroll:
8979 case OMPD_sections:
8980 case OMPD_section:
8981 case OMPD_single:
8982 case OMPD_master:
8983 case OMPD_critical:
8984 case OMPD_taskyield:
8985 case OMPD_barrier:
8986 case OMPD_taskwait:
8987 case OMPD_taskgroup:
8988 case OMPD_atomic:
8989 case OMPD_flush:
8990 case OMPD_depobj:
8991 case OMPD_scan:
8992 case OMPD_teams:
8993 case OMPD_target_data:
8994 case OMPD_target_exit_data:
8995 case OMPD_target_enter_data:
8996 case OMPD_distribute:
8997 case OMPD_distribute_simd:
8998 case OMPD_distribute_parallel_for:
8999 case OMPD_distribute_parallel_for_simd:
9000 case OMPD_teams_distribute:
9001 case OMPD_teams_distribute_simd:
9002 case OMPD_teams_distribute_parallel_for:
9003 case OMPD_teams_distribute_parallel_for_simd:
9004 case OMPD_target_update:
9005 case OMPD_declare_simd:
9006 case OMPD_declare_variant:
9007 case OMPD_begin_declare_variant:
9008 case OMPD_end_declare_variant:
9009 case OMPD_declare_target:
9010 case OMPD_end_declare_target:
9011 case OMPD_declare_reduction:
9012 case OMPD_declare_mapper:
9013 case OMPD_taskloop:
9014 case OMPD_taskloop_simd:
9015 case OMPD_master_taskloop:
9016 case OMPD_master_taskloop_simd:
9017 case OMPD_parallel_master_taskloop:
9018 case OMPD_parallel_master_taskloop_simd:
9019 case OMPD_requires:
9020 case OMPD_metadirective:
9021 case OMPD_unknown:
9022 default:
9023 llvm_unreachable("Unexpected directive.");
9024 }
9025 }
9026
9027 return nullptr;
9028}
9029
9030/// Emit the user-defined mapper function. The code generation follows the
9031/// pattern in the example below.
9032/// \code
9033/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9034/// void *base, void *begin,
9035/// int64_t size, int64_t type,
9036/// void *name = nullptr) {
9037/// // Allocate space for an array section first or add a base/begin for
9038/// // pointer dereference.
9039/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9040/// !maptype.IsDelete)
9041/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9042/// size*sizeof(Ty), clearToFromMember(type));
9043/// // Map members.
9044/// for (unsigned i = 0; i < size; i++) {
9045/// // For each component specified by this mapper:
9046/// for (auto c : begin[i]->all_components) {
9047/// if (c.hasMapper())
9048/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9049/// c.arg_type, c.arg_name);
9050/// else
9051/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9052/// c.arg_begin, c.arg_size, c.arg_type,
9053/// c.arg_name);
9054/// }
9055/// }
9056/// // Delete the array section.
9057/// if (size > 1 && maptype.IsDelete)
9058/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9059/// size*sizeof(Ty), clearToFromMember(type));
9060/// }
9061/// \endcode
9063 CodeGenFunction *CGF) {
9064 if (UDMMap.count(D) > 0)
9065 return;
9067 QualType Ty = D->getType();
9068 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9069 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9070 auto *MapperVarDecl =
9071 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9073 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9074 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9075
9076 // Prepare mapper function arguments and attributes.
9077 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9078 C.VoidPtrTy, ImplicitParamKind::Other);
9079 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9081 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9082 C.VoidPtrTy, ImplicitParamKind::Other);
9083 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9085 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9087 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9089 FunctionArgList Args;
9090 Args.push_back(&HandleArg);
9091 Args.push_back(&BaseArg);
9092 Args.push_back(&BeginArg);
9093 Args.push_back(&SizeArg);
9094 Args.push_back(&TypeArg);
9095 Args.push_back(&NameArg);
9096 const CGFunctionInfo &FnInfo =
9098 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9099 SmallString<64> TyStr;
9100 llvm::raw_svector_ostream Out(TyStr);
9102 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9103 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9104 Name, &CGM.getModule());
9106 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9107 // Start the mapper function code generation.
9108 CodeGenFunction MapperCGF(CGM);
9109 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9110 // Compute the starting and end addresses of array elements.
9111 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9112 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9113 C.getPointerType(Int64Ty), Loc);
9114 // Prepare common arguments for array initiation and deletion.
9115 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9116 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9117 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9118 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9119 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9120 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9121 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9122 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9123 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9124 // Convert the size in bytes into the number of array elements.
9125 Size = MapperCGF.Builder.CreateExactUDiv(
9126 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9127 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9128 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9129 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9130 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9131 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9132 C.getPointerType(Int64Ty), Loc);
9133 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9134 MapperCGF.GetAddrOfLocalVar(&NameArg),
9135 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9136
9137 // Emit array initiation if this is an array section and \p MapType indicates
9138 // that memory allocation is required.
9139 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9140 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9141 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9142
9143 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9144
9145 // Emit the loop header block.
9146 MapperCGF.EmitBlock(HeadBB);
9147 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9148 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9149 // Evaluate whether the initial condition is satisfied.
9150 llvm::Value *IsEmpty =
9151 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9152 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9153 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9154
9155 // Emit the loop body block.
9156 MapperCGF.EmitBlock(BodyBB);
9157 llvm::BasicBlock *LastBB = BodyBB;
9158 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9159 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9160 PtrPHI->addIncoming(PtrBegin, EntryBB);
9161 Address PtrCurrent(PtrPHI, ElemTy,
9162 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9163 .getAlignment()
9164 .alignmentOfArrayElement(ElementSize));
9165 // Privatize the declared variable of mapper to be the current array element.
9167 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9168 (void)Scope.Privatize();
9169
9170 // Get map clause information. Fill up the arrays with all mapped variables.
9171 MappableExprsHandler::MapCombinedInfoTy Info;
9172 MappableExprsHandler MEHandler(*D, MapperCGF);
9173 MEHandler.generateAllInfoForMapper(Info, OMPBuilder);
9174
9175 // Call the runtime API __tgt_mapper_num_components to get the number of
9176 // pre-existing components.
9177 llvm::Value *OffloadingArgs[] = {Handle};
9178 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9179 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9180 OMPRTL___tgt_mapper_num_components),
9181 OffloadingArgs);
9182 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9183 PreviousSize,
9184 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9185
9186 // Fill up the runtime mapper handle for all components.
9187 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9188 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9189 Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9190 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9191 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9192 llvm::Value *CurSizeArg = Info.Sizes[I];
9193 llvm::Value *CurNameArg =
9194 (CGM.getCodeGenOpts().getDebugInfo() ==
9195 llvm::codegenoptions::NoDebugInfo)
9196 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9197 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9198
9199 // Extract the MEMBER_OF field from the map type.
9200 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9201 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9202 Info.Types[I]));
9203 llvm::Value *MemberMapType =
9204 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9205
9206 // Combine the map type inherited from user-defined mapper with that
9207 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9208 // bits of the \a MapType, which is the input argument of the mapper
9209 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9210 // bits of MemberMapType.
9211 // [OpenMP 5.0], 1.2.6. map-type decay.
9212 // | alloc | to | from | tofrom | release | delete
9213 // ----------------------------------------------------------
9214 // alloc | alloc | alloc | alloc | alloc | release | delete
9215 // to | alloc | to | alloc | to | release | delete
9216 // from | alloc | alloc | from | from | release | delete
9217 // tofrom | alloc | to | from | tofrom | release | delete
9218 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9219 MapType,
9220 MapperCGF.Builder.getInt64(
9221 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9222 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9223 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9224 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9225 llvm::BasicBlock *AllocElseBB =
9226 MapperCGF.createBasicBlock("omp.type.alloc.else");
9227 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9228 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9229 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9230 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9231 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9232 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9233 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9234 MapperCGF.EmitBlock(AllocBB);
9235 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9236 MemberMapType,
9237 MapperCGF.Builder.getInt64(
9238 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9239 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9240 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9241 MapperCGF.Builder.CreateBr(EndBB);
9242 MapperCGF.EmitBlock(AllocElseBB);
9243 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9244 LeftToFrom,
9245 MapperCGF.Builder.getInt64(
9246 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9247 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9248 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9249 // In case of to, clear OMP_MAP_FROM.
9250 MapperCGF.EmitBlock(ToBB);
9251 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9252 MemberMapType,
9253 MapperCGF.Builder.getInt64(
9254 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9255 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9256 MapperCGF.Builder.CreateBr(EndBB);
9257 MapperCGF.EmitBlock(ToElseBB);
9258 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9259 LeftToFrom,
9260 MapperCGF.Builder.getInt64(
9261 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9262 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9263 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9264 // In case of from, clear OMP_MAP_TO.
9265 MapperCGF.EmitBlock(FromBB);
9266 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9267 MemberMapType,
9268 MapperCGF.Builder.getInt64(
9269 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9270 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9271 // In case of tofrom, do nothing.
9272 MapperCGF.EmitBlock(EndBB);
9273 LastBB = EndBB;
9274 llvm::PHINode *CurMapType =
9275 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9276 CurMapType->addIncoming(AllocMapType, AllocBB);
9277 CurMapType->addIncoming(ToMapType, ToBB);
9278 CurMapType->addIncoming(FromMapType, FromBB);
9279 CurMapType->addIncoming(MemberMapType, ToElseBB);
9280
9281 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9282 CurSizeArg, CurMapType, CurNameArg};
9283 if (Info.Mappers[I]) {
9284 // Call the corresponding mapper function.
9285 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9286 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9287 assert(MapperFunc && "Expect a valid mapper function is available.");
9288 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9289 } else {
9290 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9291 // data structure.
9292 MapperCGF.EmitRuntimeCall(
9293 OMPBuilder.getOrCreateRuntimeFunction(
9294 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9295 OffloadingArgs);
9296 }
9297 }
9298
9299 // Update the pointer to point to the next element that needs to be mapped,
9300 // and check whether we have mapped all elements.
9301 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9302 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9303 PtrPHI->addIncoming(PtrNext, LastBB);
9304 llvm::Value *IsDone =
9305 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9306 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9307 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9308
9309 MapperCGF.EmitBlock(ExitBB);
9310 // Emit array deletion if this is an array section and \p MapType indicates
9311 // that deletion is required.
9312 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9313 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9314
9315 // Emit the function exit block.
9316 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9317 MapperCGF.FinishFunction();
9318 UDMMap.try_emplace(D, Fn);
9319 if (CGF) {
9320 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9321 Decls.second.push_back(D);
9322 }
9323}
9324
9325/// Emit the array initialization or deletion portion for user-defined mapper
9326/// code generation. First, it evaluates whether an array section is mapped and
9327/// whether the \a MapType instructs to delete this section. If \a IsInit is
9328/// true, and \a MapType indicates to not delete this array, array
9329/// initialization code is generated. If \a IsInit is false, and \a MapType
9330/// indicates to not this array, array deletion code is generated.
9332 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9333 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9334 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9335 bool IsInit) {
9336 StringRef Prefix = IsInit ? ".init" : ".del";
9337
9338 // Evaluate if this is an array section.
9339 llvm::BasicBlock *BodyBB =
9340 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9341 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9342 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9343 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9344 MapType,
9345 MapperCGF.Builder.getInt64(
9346 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9347 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9348 llvm::Value *DeleteCond;
9349 llvm::Value *Cond;
9350 if (IsInit) {
9351 // base != begin?
9352 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9353 // IsPtrAndObj?
9354 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9355 MapType,
9356 MapperCGF.Builder.getInt64(
9357 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9358 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9359 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9360 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9361 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9362 DeleteCond = MapperCGF.Builder.CreateIsNull(
9363 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9364 } else {
9365 Cond = IsArray;
9366 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9367 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9368 }
9369 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9370 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9371
9372 MapperCGF.EmitBlock(BodyBB);
9373 // Get the array size by multiplying element size and element number (i.e., \p
9374 // Size).
9375 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9376 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9377 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9378 // memory allocation/deletion purpose only.
9379 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9380 MapType,
9381 MapperCGF.Builder.getInt64(
9382 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9383 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9384 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9385 MapTypeArg = MapperCGF.Builder.CreateOr(
9386 MapTypeArg,
9387 MapperCGF.Builder.getInt64(
9388 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9389 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9390
9391 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9392 // data structure.
9393 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9394 ArraySize, MapTypeArg, MapName};
9395 MapperCGF.EmitRuntimeCall(
9396 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9397 OMPRTL___tgt_push_mapper_component),
9398 OffloadingArgs);
9399}
9400
9402 const OMPDeclareMapperDecl *D) {
9403 auto I = UDMMap.find(D);
9404 if (I != UDMMap.end())
9405 return I->second;
9407 return UDMMap.lookup(D);
9408}
9409
9412 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9413 const OMPLoopDirective &D)>
9414 SizeEmitter) {
9415 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9416 const OMPExecutableDirective *TD = &D;
9417 // Get nested teams distribute kind directive, if any. For now, treat
9418 // 'target_teams_loop' as if it's really a target_teams_distribute.
9419 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9420 Kind != OMPD_target_teams_loop)
9422 if (!TD)
9423 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9424
9425 const auto *LD = cast<OMPLoopDirective>(TD);
9426 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9427 return NumIterations;
9428 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9429}
9430
9431static void
9432emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9435 bool RequiresOuterTask, const CapturedStmt &CS,
9436 bool OffloadingMandatory, CodeGenFunction &CGF) {
9437 if (OffloadingMandatory) {
9438 CGF.Builder.CreateUnreachable();
9439 } else {
9440 if (RequiresOuterTask) {
9441 CapturedVars.clear();
9442 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9443 }
9444 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9445 CapturedVars);
9446 }
9447}
9448
9449static llvm::Value *emitDeviceID(
9450 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9451 CodeGenFunction &CGF) {
9452 // Emit device ID if any.
9453 llvm::Value *DeviceID;
9454 if (Device.getPointer()) {
9455 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9456 Device.getInt() == OMPC_DEVICE_device_num) &&
9457 "Expected device_num modifier.");
9458 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9459 DeviceID =
9460 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9461 } else {
9462 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9463 }
9464 return DeviceID;
9465}
9466
9468 CodeGenFunction &CGF) {
9469 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9470
9471 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9472 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9473 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9474 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9475 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9476 /*isSigned=*/false);
9477 }
9478 return DynCGroupMem;
9479}
9481 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9482 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9483 llvm::OpenMPIRBuilder &OMPBuilder,
9485 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9486
9487 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9488 auto RI = CS.getCapturedRecordDecl()->field_begin();
9489 auto *CV = CapturedVars.begin();
9491 CE = CS.capture_end();
9492 CI != CE; ++CI, ++RI, ++CV) {
9493 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9494 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9495
9496 // VLA sizes are passed to the outlined region by copy and do not have map
9497 // information associated.
9498 if (CI->capturesVariableArrayType()) {
9499 CurInfo.Exprs.push_back(nullptr);
9500 CurInfo.BasePointers.push_back(*CV);
9501 CurInfo.DevicePtrDecls.push_back(nullptr);
9502 CurInfo.DevicePointers.push_back(
9503 MappableExprsHandler::DeviceInfoTy::None);
9504 CurInfo.Pointers.push_back(*CV);
9505 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9506 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9507 // Copy to the device as an argument. No need to retrieve it.
9508 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9509 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9510 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9511 CurInfo.Mappers.push_back(nullptr);
9512 } else {
9513 // If we have any information in the map clause, we use it, otherwise we
9514 // just do a default mapping.
9515 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
9516 if (!CI->capturesThis())
9517 MappedVarSet.insert(CI->getCapturedVar());
9518 else
9519 MappedVarSet.insert(nullptr);
9520 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9521 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9522 // Generate correct mapping for variables captured by reference in
9523 // lambdas.
9524 if (CI->capturesVariable())
9525 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9526 CurInfo, LambdaPointers);
9527 }
9528 // We expect to have at least an element of information for this capture.
9529 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9530 "Non-existing map pointer for capture!");
9531 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9532 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9533 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9534 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9535 "Inconsistent map information sizes!");
9536
9537 // If there is an entry in PartialStruct it means we have a struct with
9538 // individual members mapped. Emit an extra combined entry.
9539 if (PartialStruct.Base.isValid()) {
9540 CombinedInfo.append(PartialStruct.PreliminaryMapData);
9541 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
9542 CI->capturesThis(), OMPBuilder, nullptr,
9543 /*NotTargetParams*/ false);
9544 }
9545
9546 // We need to append the results of this capture to what we already have.
9547 CombinedInfo.append(CurInfo);
9548 }
9549 // Adjust MEMBER_OF flags for the lambdas captures.
9550 MEHandler.adjustMemberOfForLambdaCaptures(
9551 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9552 CombinedInfo.Pointers, CombinedInfo.Types);
9553}
9554static void
9555genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9556 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9557 llvm::OpenMPIRBuilder &OMPBuilder,
9558 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9560
9561 CodeGenModule &CGM = CGF.CGM;
9562 // Map any list items in a map clause that were not captures because they
9563 // weren't referenced within the construct.
9564 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9565
9566 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9567 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9568 };
9569 if (CGM.getCodeGenOpts().getDebugInfo() !=
9570 llvm::codegenoptions::NoDebugInfo) {
9571 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9572 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9573 FillInfoMap);
9574 }
9575}
9576
9578 const CapturedStmt &CS,
9580 llvm::OpenMPIRBuilder &OMPBuilder,
9581 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9582 // Get mappable expression information.
9583 MappableExprsHandler MEHandler(D, CGF);
9585
9586 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9587 MappedVarSet, CombinedInfo);
9588 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9589}
9590
9591template <typename ClauseTy>
9592static void
9596 const auto *C = D.getSingleClause<ClauseTy>();
9597 assert(!C->varlist_empty() &&
9598 "ompx_bare requires explicit num_teams and thread_limit");
9599 CodeGenFunction::RunCleanupsScope Scope(CGF);
9600 for (auto *E : C->varlist()) {
9601 llvm::Value *V = CGF.EmitScalarExpr(E);
9602 Values.push_back(
9603 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9604 }
9605}
9606
9608 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9610 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9611 const CapturedStmt &CS, bool OffloadingMandatory,
9612 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9613 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9614 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9615 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9616 const OMPLoopDirective &D)>
9617 SizeEmitter,
9618 CodeGenFunction &CGF, CodeGenModule &CGM) {
9619 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9620
9621 // Fill up the arrays with all the captured variables.
9622 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9624 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9625
9626 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9627 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9628
9629 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9630 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9631 CGF.VoidPtrTy, CGM.getPointerAlign());
9632 InputInfo.PointersArray =
9633 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9634 InputInfo.SizesArray =
9635 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9636 InputInfo.MappersArray =
9637 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9638 MapTypesArray = Info.RTArgs.MapTypesArray;
9639 MapNamesArray = Info.RTArgs.MapNamesArray;
9640
9641 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9642 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9643 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9644 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9645 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9646
9647 if (IsReverseOffloading) {
9648 // Reverse offloading is not supported, so just execute on the host.
9649 // FIXME: This fallback solution is incorrect since it ignores the
9650 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9651 // assert here and ensure SEMA emits an error.
9652 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9653 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9654 return;
9655 }
9656
9657 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9658 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9659
9660 llvm::Value *BasePointersArray =
9661 InputInfo.BasePointersArray.emitRawPointer(CGF);
9662 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9663 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9664 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9665
9666 auto &&EmitTargetCallFallbackCB =
9667 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9668 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9669 -> llvm::OpenMPIRBuilder::InsertPointTy {
9670 CGF.Builder.restoreIP(IP);
9671 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9672 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9673 return CGF.Builder.saveIP();
9674 };
9675
9676 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9679 if (IsBare) {
9680 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, NumTeams);
9681 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9682 NumThreads);
9683 } else {
9684 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9685 NumThreads.push_back(
9686 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9687 }
9688
9689 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9690 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9691 llvm::Value *NumIterations =
9692 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9693 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9694 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9695 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9696
9697 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9698 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9699 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9700
9701 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9702 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9703 DynCGGroupMem, HasNoWait);
9704
9705 CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9706 CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9707 DeviceID, RTLoc, AllocaIP));
9708 };
9709
9710 if (RequiresOuterTask)
9711 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9712 else
9713 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9714}
9715
9716static void
9717emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9720 bool RequiresOuterTask, const CapturedStmt &CS,
9721 bool OffloadingMandatory, CodeGenFunction &CGF) {
9722
9723 // Notify that the host version must be executed.
9724 auto &&ElseGen =
9725 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9726 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9727 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9728 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9729 };
9730
9731 if (RequiresOuterTask) {
9732 CodeGenFunction::OMPTargetDataInfo InputInfo;
9733 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9734 } else {
9735 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9736 }
9737}
9738
9741 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9742 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9743 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9744 const OMPLoopDirective &D)>
9745 SizeEmitter) {
9746 if (!CGF.HaveInsertPoint())
9747 return;
9748
9749 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9750 CGM.getLangOpts().OpenMPOffloadMandatory;
9751
9752 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9753
9754 const bool RequiresOuterTask =
9755 D.hasClausesOfKind<OMPDependClause>() ||
9756 D.hasClausesOfKind<OMPNowaitClause>() ||
9757 D.hasClausesOfKind<OMPInReductionClause>() ||
9758 (CGM.getLangOpts().OpenMP >= 51 &&
9759 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9760 D.hasClausesOfKind<OMPThreadLimitClause>());
9762 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9763 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9764 PrePostActionTy &) {
9765 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9766 };
9767 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9768
9770 llvm::Value *MapTypesArray = nullptr;
9771 llvm::Value *MapNamesArray = nullptr;
9772
9773 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9774 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9775 OutlinedFnID, &InputInfo, &MapTypesArray,
9776 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9777 PrePostActionTy &) {
9778 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9779 RequiresOuterTask, CS, OffloadingMandatory,
9780 Device, OutlinedFnID, InputInfo, MapTypesArray,
9781 MapNamesArray, SizeEmitter, CGF, CGM);
9782 };
9783
9784 auto &&TargetElseGen =
9785 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9786 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9787 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9788 CS, OffloadingMandatory, CGF);
9789 };
9790
9791 // If we have a target function ID it means that we need to support
9792 // offloading, otherwise, just execute on the host. We need to execute on host
9793 // regardless of the conditional in the if clause if, e.g., the user do not
9794 // specify target triples.
9795 if (OutlinedFnID) {
9796 if (IfCond) {
9797 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9798 } else {
9799 RegionCodeGenTy ThenRCG(TargetThenGen);
9800 ThenRCG(CGF);
9801 }
9802 } else {
9803 RegionCodeGenTy ElseRCG(TargetElseGen);
9804 ElseRCG(CGF);
9805 }
9806}
9807
9809 StringRef ParentName) {
9810 if (!S)
9811 return;
9812
9813 // Codegen OMP target directives that offload compute to the device.
9814 bool RequiresDeviceCodegen =
9815 isa<OMPExecutableDirective>(S) &&
9817 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9818
9819 if (RequiresDeviceCodegen) {
9820 const auto &E = *cast<OMPExecutableDirective>(S);
9821
9822 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9823 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9824
9825 // Is this a target region that should not be emitted as an entry point? If
9826 // so just signal we are done with this target region.
9827 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9828 return;
9829
9830 switch (E.getDirectiveKind()) {
9831 case OMPD_target:
9833 cast<OMPTargetDirective>(E));
9834 break;
9835 case OMPD_target_parallel:
9837 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9838 break;
9839 case OMPD_target_teams:
9841 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9842 break;
9843 case OMPD_target_teams_distribute:
9845 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9846 break;
9847 case OMPD_target_teams_distribute_simd:
9849 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9850 break;
9851 case OMPD_target_parallel_for:
9853 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9854 break;
9855 case OMPD_target_parallel_for_simd:
9857 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9858 break;
9859 case OMPD_target_simd:
9861 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9862 break;
9863 case OMPD_target_teams_distribute_parallel_for:
9865 CGM, ParentName,
9866 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9867 break;
9868 case OMPD_target_teams_distribute_parallel_for_simd:
9871 CGM, ParentName,
9872 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9873 break;
9874 case OMPD_target_teams_loop:
9876 CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E));
9877 break;
9878 case OMPD_target_parallel_loop:
9880 CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E));
9881 break;
9882 case OMPD_parallel:
9883 case OMPD_for:
9884 case OMPD_parallel_for:
9885 case OMPD_parallel_master:
9886 case OMPD_parallel_sections:
9887 case OMPD_for_simd:
9888 case OMPD_parallel_for_simd:
9889 case OMPD_cancel:
9890 case OMPD_cancellation_point:
9891 case OMPD_ordered:
9892 case OMPD_threadprivate:
9893 case OMPD_allocate:
9894 case OMPD_task:
9895 case OMPD_simd:
9896 case OMPD_tile:
9897 case OMPD_unroll:
9898 case OMPD_sections:
9899 case OMPD_section:
9900 case OMPD_single:
9901 case OMPD_master:
9902 case OMPD_critical:
9903 case OMPD_taskyield:
9904 case OMPD_barrier:
9905 case OMPD_taskwait:
9906 case OMPD_taskgroup:
9907 case OMPD_atomic:
9908 case OMPD_flush:
9909 case OMPD_depobj:
9910 case OMPD_scan:
9911 case OMPD_teams:
9912 case OMPD_target_data:
9913 case OMPD_target_exit_data:
9914 case OMPD_target_enter_data:
9915 case OMPD_distribute:
9916 case OMPD_distribute_simd:
9917 case OMPD_distribute_parallel_for:
9918 case OMPD_distribute_parallel_for_simd:
9919 case OMPD_teams_distribute:
9920 case OMPD_teams_distribute_simd:
9921 case OMPD_teams_distribute_parallel_for:
9922 case OMPD_teams_distribute_parallel_for_simd:
9923 case OMPD_target_update:
9924 case OMPD_declare_simd:
9925 case OMPD_declare_variant:
9926 case OMPD_begin_declare_variant:
9927 case OMPD_end_declare_variant:
9928 case OMPD_declare_target:
9929 case OMPD_end_declare_target:
9930 case OMPD_declare_reduction:
9931 case OMPD_declare_mapper:
9932 case OMPD_taskloop:
9933 case OMPD_taskloop_simd:
9934 case OMPD_master_taskloop:
9935 case OMPD_master_taskloop_simd:
9936 case OMPD_parallel_master_taskloop:
9937 case OMPD_parallel_master_taskloop_simd:
9938 case OMPD_requires:
9939 case OMPD_metadirective:
9940 case OMPD_unknown:
9941 default:
9942 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9943 }
9944 return;
9945 }
9946
9947 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9948 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9949 return;
9950
9951 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
9952 return;
9953 }
9954
9955 // If this is a lambda function, look into its body.
9956 if (const auto *L = dyn_cast<LambdaExpr>(S))
9957 S = L->getBody();
9958
9959 // Keep looking for target regions recursively.
9960 for (const Stmt *II : S->children())
9961 scanForTargetRegionsFunctions(II, ParentName);
9962}
9963
9964static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9965 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9966 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9967 if (!DevTy)
9968 return false;
9969 // Do not emit device_type(nohost) functions for the host.
9970 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9971 return true;
9972 // Do not emit device_type(host) functions for the device.
9973 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9974 return true;
9975 return false;
9976}
9977
9979 // If emitting code for the host, we do not process FD here. Instead we do
9980 // the normal code generation.
9981 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9982 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
9983 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9984 CGM.getLangOpts().OpenMPIsTargetDevice))
9985 return true;
9986 return false;
9987 }
9988
9989 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9990 // Try to detect target regions in the function.
9991 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
9992 StringRef Name = CGM.getMangledName(GD);
9993 scanForTargetRegionsFunctions(FD->getBody(), Name);
9994 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
9995 CGM.getLangOpts().OpenMPIsTargetDevice))
9996 return true;
9997 }
9998
9999 // Do not to emit function if it is not marked as declare target.
10000 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10001 AlreadyEmittedTargetDecls.count(VD) == 0;
10002}
10003
10005 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10006 CGM.getLangOpts().OpenMPIsTargetDevice))
10007 return true;
10008
10009 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10010 return false;
10011
10012 // Check if there are Ctors/Dtors in this declaration and look for target
10013 // regions in it. We use the complete variant to produce the kernel name
10014 // mangling.
10015 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10016 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10017 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10018 StringRef ParentName =
10020 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10021 }
10022 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10023 StringRef ParentName =
10025 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10026 }
10027 }
10028
10029 // Do not to emit variable if it is not marked as declare target.
10030 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10031 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10032 cast<VarDecl>(GD.getDecl()));
10033 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10034 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10035 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10037 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10038 return true;
10039 }
10040 return false;
10041}
10042
10044 llvm::Constant *Addr) {
10045 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10046 !CGM.getLangOpts().OpenMPIsTargetDevice)
10047 return;
10048
10049 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10050 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10051
10052 // If this is an 'extern' declaration we defer to the canonical definition and
10053 // do not emit an offloading entry.
10054 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10055 VD->hasExternalStorage())
10056 return;
10057
10058 if (!Res) {
10059 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10060 // Register non-target variables being emitted in device code (debug info
10061 // may cause this).
10062 StringRef VarName = CGM.getMangledName(VD);
10063 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10064 }
10065 return;
10066 }
10067
10068 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10069 auto LinkageForVariable = [&VD, this]() {
10071 };
10072
10073 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10074 OMPBuilder.registerTargetGlobalVariable(
10077 VD->isExternallyVisible(),
10079 VD->getCanonicalDecl()->getBeginLoc()),
10080 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10081 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10084 Addr);
10085
10086 for (auto *ref : GeneratedRefs)
10088}
10089
10091 if (isa<FunctionDecl>(GD.getDecl()) ||
10092 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10093 return emitTargetFunctions(GD);
10094
10095 return emitTargetGlobalVariable(GD);
10096}
10097
10099 for (const VarDecl *VD : DeferredGlobalVariables) {
10100 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10101 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10102 if (!Res)
10103 continue;
10104 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10105 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10107 CGM.EmitGlobal(VD);
10108 } else {
10109 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10110 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10111 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10113 "Expected link clause or to clause with unified memory.");
10115 }
10116 }
10117}
10118
10120 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10121 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10122 " Expected target-based directive.");
10123}
10124
10126 for (const OMPClause *Clause : D->clauselists()) {
10127 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10129 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10130 } else if (const auto *AC =
10131 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10132 switch (AC->getAtomicDefaultMemOrderKind()) {
10133 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10134 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10135 break;
10136 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10137 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10138 break;
10139 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10140 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10141 break;
10143 break;
10144 }
10145 }
10146 }
10147}
10148
10149llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10151}
10152
10154 LangAS &AS) {
10155 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10156 return false;
10157 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10158 switch(A->getAllocatorType()) {
10159 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10160 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10161 // Not supported, fallback to the default mem space.
10162 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10163 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10164 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10165 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10166 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10167 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10168 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10169 AS = LangAS::Default;
10170 return true;
10171 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10172 llvm_unreachable("Expected predefined allocator for the variables with the "
10173 "static storage.");
10174 }
10175 return false;
10176}
10177
10180}
10181
10183 CodeGenModule &CGM)
10184 : CGM(CGM) {
10185 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10186 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10188 }
10189}
10190
10192 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10193 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10194}
10195
10197 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10198 return true;
10199
10200 const auto *D = cast<FunctionDecl>(GD.getDecl());
10201 // Do not to emit function if it is marked as declare target as it was already
10202 // emitted.
10203 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10204 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10205 if (auto *F = dyn_cast_or_null<llvm::Function>(
10207 return !F->isDeclaration();
10208 return false;
10209 }
10210 return true;
10211 }
10212
10213 return !AlreadyEmittedTargetDecls.insert(D).second;
10214}
10215
10219 llvm::Function *OutlinedFn,
10220 ArrayRef<llvm::Value *> CapturedVars) {
10221 if (!CGF.HaveInsertPoint())
10222 return;
10223
10224 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10226
10227 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10228 llvm::Value *Args[] = {
10229 RTLoc,
10230 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10231 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10233 RealArgs.append(std::begin(Args), std::end(Args));
10234 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10235
10236 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10237 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10238 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10239}
10240
10242 const Expr *NumTeams,
10243 const Expr *ThreadLimit,
10245 if (!CGF.HaveInsertPoint())
10246 return;
10247
10248 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10249
10250 llvm::Value *NumTeamsVal =
10251 NumTeams
10252 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10253 CGF.CGM.Int32Ty, /* isSigned = */ true)
10254 : CGF.Builder.getInt32(0);
10255
10256 llvm::Value *ThreadLimitVal =
10257 ThreadLimit
10258 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10259 CGF.CGM.Int32Ty, /* isSigned = */ true)
10260 : CGF.Builder.getInt32(0);
10261
10262 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10263 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10264 ThreadLimitVal};
10265 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10266 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10267 PushNumTeamsArgs);
10268}
10269
10271 const Expr *ThreadLimit,
10273 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10274 llvm::Value *ThreadLimitVal =
10275 ThreadLimit
10276 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10277 CGF.CGM.Int32Ty, /* isSigned = */ true)
10278 : CGF.Builder.getInt32(0);
10279
10280 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10281 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10282 ThreadLimitVal};
10283 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10284 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10285 ThreadLimitArgs);
10286}
10287
10289 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10290 const Expr *Device, const RegionCodeGenTy &CodeGen,
10292 if (!CGF.HaveInsertPoint())
10293 return;
10294
10295 // Action used to replace the default codegen action and turn privatization
10296 // off.
10297 PrePostActionTy NoPrivAction;
10298
10299 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10300
10301 llvm::Value *IfCondVal = nullptr;
10302 if (IfCond)
10303 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10304
10305 // Emit device ID if any.
10306 llvm::Value *DeviceID = nullptr;
10307 if (Device) {
10308 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10309 CGF.Int64Ty, /*isSigned=*/true);
10310 } else {
10311 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10312 }
10313
10314 // Fill up the arrays with all the mapped variables.
10315 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10316 auto GenMapInfoCB =
10317 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10318 CGF.Builder.restoreIP(CodeGenIP);
10319 // Get map clause information.
10320 MappableExprsHandler MEHandler(D, CGF);
10321 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10322
10323 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10324 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10325 };
10326 if (CGM.getCodeGenOpts().getDebugInfo() !=
10327 llvm::codegenoptions::NoDebugInfo) {
10328 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10329 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10330 FillInfoMap);
10331 }
10332
10333 return CombinedInfo;
10334 };
10335 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10336 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10337 CGF.Builder.restoreIP(CodeGenIP);
10338 switch (BodyGenType) {
10339 case BodyGenTy::Priv:
10340 if (!Info.CaptureDeviceAddrMap.empty())
10341 CodeGen(CGF);
10342 break;
10343 case BodyGenTy::DupNoPriv:
10344 if (!Info.CaptureDeviceAddrMap.empty()) {
10345 CodeGen.setAction(NoPrivAction);
10346 CodeGen(CGF);
10347 }
10348 break;
10349 case BodyGenTy::NoPriv:
10350 if (Info.CaptureDeviceAddrMap.empty()) {
10351 CodeGen.setAction(NoPrivAction);
10352 CodeGen(CGF);
10353 }
10354 break;
10355 }
10356 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10357 CGF.Builder.GetInsertPoint());
10358 };
10359
10360 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10361 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10362 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10363 }
10364 };
10365
10366 auto CustomMapperCB = [&](unsigned int I) {
10367 llvm::Value *MFunc = nullptr;
10368 if (CombinedInfo.Mappers[I]) {
10369 Info.HasMapper = true;
10371 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10372 }
10373 return MFunc;
10374 };
10375
10376 // Source location for the ident struct
10377 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10378
10379 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10380 CGF.AllocaInsertPt->getIterator());
10381 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10382 CGF.Builder.GetInsertPoint());
10383 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10384 CGF.Builder.restoreIP(OMPBuilder.createTargetData(
10385 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10386 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
10387}
10388
10390 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10391 const Expr *Device) {
10392 if (!CGF.HaveInsertPoint())
10393 return;
10394
10395 assert((isa<OMPTargetEnterDataDirective>(D) ||
10396 isa<OMPTargetExitDataDirective>(D) ||
10397 isa<OMPTargetUpdateDirective>(D)) &&
10398 "Expecting either target enter, exit data, or update directives.");
10399
10401 llvm::Value *MapTypesArray = nullptr;
10402 llvm::Value *MapNamesArray = nullptr;
10403 // Generate the code for the opening of the data environment.
10404 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10405 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10406 // Emit device ID if any.
10407 llvm::Value *DeviceID = nullptr;
10408 if (Device) {
10409 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10410 CGF.Int64Ty, /*isSigned=*/true);
10411 } else {
10412 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10413 }
10414
10415 // Emit the number of elements in the offloading arrays.
10416 llvm::Constant *PointerNum =
10417 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10418
10419 // Source location for the ident struct
10420 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10421
10422 SmallVector<llvm::Value *, 13> OffloadingArgs(
10423 {RTLoc, DeviceID, PointerNum,
10424 InputInfo.BasePointersArray.emitRawPointer(CGF),
10425 InputInfo.PointersArray.emitRawPointer(CGF),
10426 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10427 InputInfo.MappersArray.emitRawPointer(CGF)});
10428
10429 // Select the right runtime function call for each standalone
10430 // directive.
10431 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10432 RuntimeFunction RTLFn;
10433 switch (D.getDirectiveKind()) {
10434 case OMPD_target_enter_data:
10435 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10436 : OMPRTL___tgt_target_data_begin_mapper;
10437 break;
10438 case OMPD_target_exit_data:
10439 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10440 : OMPRTL___tgt_target_data_end_mapper;
10441 break;
10442 case OMPD_target_update:
10443 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10444 : OMPRTL___tgt_target_data_update_mapper;
10445 break;
10446 case OMPD_parallel:
10447 case OMPD_for:
10448 case OMPD_parallel_for:
10449 case OMPD_parallel_master:
10450 case OMPD_parallel_sections:
10451 case OMPD_for_simd:
10452 case OMPD_parallel_for_simd:
10453 case OMPD_cancel:
10454 case OMPD_cancellation_point:
10455 case OMPD_ordered:
10456 case OMPD_threadprivate:
10457 case OMPD_allocate:
10458 case OMPD_task:
10459 case OMPD_simd:
10460 case OMPD_tile:
10461 case OMPD_unroll:
10462 case OMPD_sections:
10463 case OMPD_section:
10464 case OMPD_single:
10465 case OMPD_master:
10466 case OMPD_critical:
10467 case OMPD_taskyield:
10468 case OMPD_barrier:
10469 case OMPD_taskwait:
10470 case OMPD_taskgroup:
10471 case OMPD_atomic:
10472 case OMPD_flush:
10473 case OMPD_depobj:
10474 case OMPD_scan:
10475 case OMPD_teams:
10476 case OMPD_target_data:
10477 case OMPD_distribute:
10478 case OMPD_distribute_simd:
10479 case OMPD_distribute_parallel_for:
10480 case OMPD_distribute_parallel_for_simd:
10481 case OMPD_teams_distribute:
10482 case OMPD_teams_distribute_simd:
10483 case OMPD_teams_distribute_parallel_for:
10484 case OMPD_teams_distribute_parallel_for_simd:
10485 case OMPD_declare_simd:
10486 case OMPD_declare_variant:
10487 case OMPD_begin_declare_variant:
10488 case OMPD_end_declare_variant:
10489 case OMPD_declare_target:
10490 case OMPD_end_declare_target:
10491 case OMPD_declare_reduction:
10492 case OMPD_declare_mapper:
10493 case OMPD_taskloop:
10494 case OMPD_taskloop_simd:
10495 case OMPD_master_taskloop:
10496 case OMPD_master_taskloop_simd:
10497 case OMPD_parallel_master_taskloop:
10498 case OMPD_parallel_master_taskloop_simd:
10499 case OMPD_target:
10500 case OMPD_target_simd:
10501 case OMPD_target_teams_distribute:
10502 case OMPD_target_teams_distribute_simd:
10503 case OMPD_target_teams_distribute_parallel_for:
10504 case OMPD_target_teams_distribute_parallel_for_simd:
10505 case OMPD_target_teams:
10506 case OMPD_target_parallel:
10507 case OMPD_target_parallel_for:
10508 case OMPD_target_parallel_for_simd:
10509 case OMPD_requires:
10510 case OMPD_metadirective:
10511 case OMPD_unknown:
10512 default:
10513 llvm_unreachable("Unexpected standalone target data directive.");
10514 break;
10515 }
10516 if (HasNowait) {
10517 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10518 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10519 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10520 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10521 }
10522 CGF.EmitRuntimeCall(
10523 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10524 OffloadingArgs);
10525 };
10526
10527 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10528 &MapNamesArray](CodeGenFunction &CGF,
10529 PrePostActionTy &) {
10530 // Fill up the arrays with all the mapped variables.
10531 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10533 MappableExprsHandler MEHandler(D, CGF);
10534 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10535 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10536 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10537
10538 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10539 D.hasClausesOfKind<OMPNowaitClause>();
10540
10541 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10542 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10544 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10546 InputInfo.SizesArray =
10547 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10548 InputInfo.MappersArray =
10549 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10550 MapTypesArray = Info.RTArgs.MapTypesArray;
10551 MapNamesArray = Info.RTArgs.MapNamesArray;
10552 if (RequiresOuterTask)
10553 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10554 else
10555 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10556 };
10557
10558 if (IfCond) {
10559 emitIfClause(CGF, IfCond, TargetThenGen,
10560 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10561 } else {
10562 RegionCodeGenTy ThenRCG(TargetThenGen);
10563 ThenRCG(CGF);
10564 }
10565}
10566
10567namespace {
10568 /// Kind of parameter in a function with 'declare simd' directive.
10569enum ParamKindTy {
10570 Linear,
10571 LinearRef,
10572 LinearUVal,
10573 LinearVal,
10574 Uniform,
10575 Vector,
10576};
10577/// Attribute set of the parameter.
10578struct ParamAttrTy {
10579 ParamKindTy Kind = Vector;
10580 llvm::APSInt StrideOrArg;
10581 llvm::APSInt Alignment;
10582 bool HasVarStride = false;
10583};
10584} // namespace
10585
10586static unsigned evaluateCDTSize(const FunctionDecl *FD,
10587 ArrayRef<ParamAttrTy> ParamAttrs) {
10588 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10589 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10590 // of that clause. The VLEN value must be power of 2.
10591 // In other case the notion of the function`s "characteristic data type" (CDT)
10592 // is used to compute the vector length.
10593 // CDT is defined in the following order:
10594 // a) For non-void function, the CDT is the return type.
10595 // b) If the function has any non-uniform, non-linear parameters, then the
10596 // CDT is the type of the first such parameter.
10597 // c) If the CDT determined by a) or b) above is struct, union, or class
10598 // type which is pass-by-value (except for the type that maps to the
10599 // built-in complex data type), the characteristic data type is int.
10600 // d) If none of the above three cases is applicable, the CDT is int.
10601 // The VLEN is then determined based on the CDT and the size of vector
10602 // register of that ISA for which current vector version is generated. The
10603 // VLEN is computed using the formula below:
10604 // VLEN = sizeof(vector_register) / sizeof(CDT),
10605 // where vector register size specified in section 3.2.1 Registers and the
10606 // Stack Frame of original AMD64 ABI document.
10607 QualType RetType = FD->getReturnType();
10608 if (RetType.isNull())
10609 return 0;
10610 ASTContext &C = FD->getASTContext();
10611 QualType CDT;
10612 if (!RetType.isNull() && !RetType->isVoidType()) {
10613 CDT = RetType;
10614 } else {
10615 unsigned Offset = 0;
10616 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10617 if (ParamAttrs[Offset].Kind == Vector)
10618 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
10619 ++Offset;
10620 }
10621 if (CDT.isNull()) {
10622 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10623 if (ParamAttrs[I + Offset].Kind == Vector) {
10624 CDT = FD->getParamDecl(I)->getType();
10625 break;
10626 }
10627 }
10628 }
10629 }
10630 if (CDT.isNull())
10631 CDT = C.IntTy;
10632 CDT = CDT->getCanonicalTypeUnqualified();
10633 if (CDT->isRecordType() || CDT->isUnionType())
10634 CDT = C.IntTy;
10635 return C.getTypeSize(CDT);
10636}
10637
10638/// Mangle the parameter part of the vector function name according to
10639/// their OpenMP classification. The mangling function is defined in
10640/// section 4.5 of the AAVFABI(2021Q1).
10641static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10642 SmallString<256> Buffer;
10643 llvm::raw_svector_ostream Out(Buffer);
10644 for (const auto &ParamAttr : ParamAttrs) {
10645 switch (ParamAttr.Kind) {
10646 case Linear:
10647 Out << 'l';
10648 break;
10649 case LinearRef:
10650 Out << 'R';
10651 break;
10652 case LinearUVal:
10653 Out << 'U';
10654 break;
10655 case LinearVal:
10656 Out << 'L';
10657 break;
10658 case Uniform:
10659 Out << 'u';
10660 break;
10661 case Vector:
10662 Out << 'v';
10663 break;
10664 }
10665 if (ParamAttr.HasVarStride)
10666 Out << "s" << ParamAttr.StrideOrArg;
10667 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10668 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10669 // Don't print the step value if it is not present or if it is
10670 // equal to 1.
10671 if (ParamAttr.StrideOrArg < 0)
10672 Out << 'n' << -ParamAttr.StrideOrArg;
10673 else if (ParamAttr.StrideOrArg != 1)
10674 Out << ParamAttr.StrideOrArg;
10675 }
10676
10677 if (!!ParamAttr.Alignment)
10678 Out << 'a' << ParamAttr.Alignment;
10679 }
10680
10681 return std::string(Out.str());
10682}
10683
10684static void
10685emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10686 const llvm::APSInt &VLENVal,
10687 ArrayRef<ParamAttrTy> ParamAttrs,
10688 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10689 struct ISADataTy {
10690 char ISA;
10691 unsigned VecRegSize;
10692 };
10693 ISADataTy ISAData[] = {
10694 {
10695 'b', 128
10696 }, // SSE
10697 {
10698 'c', 256
10699 }, // AVX
10700 {
10701 'd', 256
10702 }, // AVX2
10703 {
10704 'e', 512
10705 }, // AVX512
10706 };
10708 switch (State) {
10709 case OMPDeclareSimdDeclAttr::BS_Undefined:
10710 Masked.push_back('N');
10711 Masked.push_back('M');
10712 break;
10713 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10714 Masked.push_back('N');
10715 break;
10716 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10717 Masked.push_back('M');
10718 break;
10719 }
10720 for (char Mask : Masked) {
10721 for (const ISADataTy &Data : ISAData) {
10722 SmallString<256> Buffer;
10723 llvm::raw_svector_ostream Out(Buffer);
10724 Out << "_ZGV" << Data.ISA << Mask;
10725 if (!VLENVal) {
10726 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10727 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10728 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10729 } else {
10730 Out << VLENVal;
10731 }
10732 Out << mangleVectorParameters(ParamAttrs);
10733 Out << '_' << Fn->getName();
10734 Fn->addFnAttr(Out.str());
10735 }
10736 }
10737}
10738
10739// This are the Functions that are needed to mangle the name of the
10740// vector functions generated by the compiler, according to the rules
10741// defined in the "Vector Function ABI specifications for AArch64",
10742// available at
10743// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10744
10745/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10746static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10747 QT = QT.getCanonicalType();
10748
10749 if (QT->isVoidType())
10750 return false;
10751
10752 if (Kind == ParamKindTy::Uniform)
10753 return false;
10754
10755 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10756 return false;
10757
10758 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10759 !QT->isReferenceType())
10760 return false;
10761
10762 return true;
10763}
10764
10765/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10767 QT = QT.getCanonicalType();
10768 unsigned Size = C.getTypeSize(QT);
10769
10770 // Only scalars and complex within 16 bytes wide set PVB to true.
10771 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10772 return false;
10773
10774 if (QT->isFloatingType())
10775 return true;
10776
10777 if (QT->isIntegerType())
10778 return true;
10779
10780 if (QT->isPointerType())
10781 return true;
10782
10783 // TODO: Add support for complex types (section 3.1.2, item 2).
10784
10785 return false;
10786}
10787
10788/// Computes the lane size (LS) of a return type or of an input parameter,
10789/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10790/// TODO: Add support for references, section 3.2.1, item 1.
10791static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10792 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10794 if (getAArch64PBV(PTy, C))
10795 return C.getTypeSize(PTy);
10796 }
10797 if (getAArch64PBV(QT, C))
10798 return C.getTypeSize(QT);
10799
10800 return C.getTypeSize(C.getUIntPtrType());
10801}
10802
10803// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10804// signature of the scalar function, as defined in 3.2.2 of the
10805// AAVFABI.
10806static std::tuple<unsigned, unsigned, bool>
10808 QualType RetType = FD->getReturnType().getCanonicalType();
10809
10810 ASTContext &C = FD->getASTContext();
10811
10812 bool OutputBecomesInput = false;
10813
10815 if (!RetType->isVoidType()) {
10816 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10817 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10818 OutputBecomesInput = true;
10819 }
10820 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10822 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10823 }
10824
10825 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10826 // The LS of a function parameter / return value can only be a power
10827 // of 2, starting from 8 bits, up to 128.
10828 assert(llvm::all_of(Sizes,
10829 [](unsigned Size) {
10830 return Size == 8 || Size == 16 || Size == 32 ||
10831 Size == 64 || Size == 128;
10832 }) &&
10833 "Invalid size");
10834
10835 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10836 *std::max_element(std::begin(Sizes), std::end(Sizes)),
10837 OutputBecomesInput);
10838}
10839
10840// Function used to add the attribute. The parameter `VLEN` is
10841// templated to allow the use of "x" when targeting scalable functions
10842// for SVE.
10843template <typename T>
10844static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10845 char ISA, StringRef ParSeq,
10846 StringRef MangledName, bool OutputBecomesInput,
10847 llvm::Function *Fn) {
10848 SmallString<256> Buffer;
10849 llvm::raw_svector_ostream Out(Buffer);
10850 Out << Prefix << ISA << LMask << VLEN;
10851 if (OutputBecomesInput)
10852 Out << "v";
10853 Out << ParSeq << "_" << MangledName;
10854 Fn->addFnAttr(Out.str());
10855}
10856
10857// Helper function to generate the Advanced SIMD names depending on
10858// the value of the NDS when simdlen is not present.
10859static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10860 StringRef Prefix, char ISA,
10861 StringRef ParSeq, StringRef MangledName,
10862 bool OutputBecomesInput,
10863 llvm::Function *Fn) {
10864 switch (NDS) {
10865 case 8:
10866 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10867 OutputBecomesInput, Fn);
10868 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10869 OutputBecomesInput, Fn);
10870 break;
10871 case 16:
10872 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10873 OutputBecomesInput, Fn);
10874 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10875 OutputBecomesInput, Fn);
10876 break;
10877 case 32:
10878 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10879 OutputBecomesInput, Fn);
10880 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10881 OutputBecomesInput, Fn);
10882 break;
10883 case 64:
10884 case 128:
10885 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10886 OutputBecomesInput, Fn);
10887 break;
10888 default:
10889 llvm_unreachable("Scalar type is too wide.");
10890 }
10891}
10892
10893/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10895 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10896 ArrayRef<ParamAttrTy> ParamAttrs,
10897 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10898 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10899
10900 // Get basic data for building the vector signature.
10901 const auto Data = getNDSWDS(FD, ParamAttrs);
10902 const unsigned NDS = std::get<0>(Data);
10903 const unsigned WDS = std::get<1>(Data);
10904 const bool OutputBecomesInput = std::get<2>(Data);
10905
10906 // Check the values provided via `simdlen` by the user.
10907 // 1. A `simdlen(1)` doesn't produce vector signatures,
10908 if (UserVLEN == 1) {
10909 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10911 "The clause simdlen(1) has no effect when targeting aarch64.");
10912 CGM.getDiags().Report(SLoc, DiagID);
10913 return;
10914 }
10915
10916 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10917 // Advanced SIMD output.
10918 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10919 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10920 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10921 "power of 2 when targeting Advanced SIMD.");
10922 CGM.getDiags().Report(SLoc, DiagID);
10923 return;
10924 }
10925
10926 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10927 // limits.
10928 if (ISA == 's' && UserVLEN != 0) {
10929 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10930 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10931 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10932 "lanes in the architectural constraints "
10933 "for SVE (min is 128-bit, max is "
10934 "2048-bit, by steps of 128-bit)");
10935 CGM.getDiags().Report(SLoc, DiagID) << WDS;
10936 return;
10937 }
10938 }
10939
10940 // Sort out parameter sequence.
10941 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10942 StringRef Prefix = "_ZGV";
10943 // Generate simdlen from user input (if any).
10944 if (UserVLEN) {
10945 if (ISA == 's') {
10946 // SVE generates only a masked function.
10947 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10948 OutputBecomesInput, Fn);
10949 } else {
10950 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10951 // Advanced SIMD generates one or two functions, depending on
10952 // the `[not]inbranch` clause.
10953 switch (State) {
10954 case OMPDeclareSimdDeclAttr::BS_Undefined:
10955 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10956 OutputBecomesInput, Fn);
10957 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10958 OutputBecomesInput, Fn);
10959 break;
10960 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10961 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10962 OutputBecomesInput, Fn);
10963 break;
10964 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10965 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10966 OutputBecomesInput, Fn);
10967 break;
10968 }
10969 }
10970 } else {
10971 // If no user simdlen is provided, follow the AAVFABI rules for
10972 // generating the vector length.
10973 if (ISA == 's') {
10974 // SVE, section 3.4.1, item 1.
10975 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10976 OutputBecomesInput, Fn);
10977 } else {
10978 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10979 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10980 // two vector names depending on the use of the clause
10981 // `[not]inbranch`.
10982 switch (State) {
10983 case OMPDeclareSimdDeclAttr::BS_Undefined:
10984 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10985 OutputBecomesInput, Fn);
10986 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10987 OutputBecomesInput, Fn);
10988 break;
10989 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10990 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10991 OutputBecomesInput, Fn);
10992 break;
10993 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10994 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10995 OutputBecomesInput, Fn);
10996 break;
10997 }
10998 }
10999 }
11000}
11001
11003 llvm::Function *Fn) {
11005 FD = FD->getMostRecentDecl();
11006 while (FD) {
11007 // Map params to their positions in function decl.
11008 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11009 if (isa<CXXMethodDecl>(FD))
11010 ParamPositions.try_emplace(FD, 0);
11011 unsigned ParamPos = ParamPositions.size();
11012 for (const ParmVarDecl *P : FD->parameters()) {
11013 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11014 ++ParamPos;
11015 }
11016 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11017 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11018 // Mark uniform parameters.
11019 for (const Expr *E : Attr->uniforms()) {
11020 E = E->IgnoreParenImpCasts();
11021 unsigned Pos;
11022 if (isa<CXXThisExpr>(E)) {
11023 Pos = ParamPositions[FD];
11024 } else {
11025 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11026 ->getCanonicalDecl();
11027 auto It = ParamPositions.find(PVD);
11028 assert(It != ParamPositions.end() && "Function parameter not found");
11029 Pos = It->second;
11030 }
11031 ParamAttrs[Pos].Kind = Uniform;
11032 }
11033 // Get alignment info.
11034 auto *NI = Attr->alignments_begin();
11035 for (const Expr *E : Attr->aligneds()) {
11036 E = E->IgnoreParenImpCasts();
11037 unsigned Pos;
11038 QualType ParmTy;
11039 if (isa<CXXThisExpr>(E)) {
11040 Pos = ParamPositions[FD];
11041 ParmTy = E->getType();
11042 } else {
11043 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11044 ->getCanonicalDecl();
11045 auto It = ParamPositions.find(PVD);
11046 assert(It != ParamPositions.end() && "Function parameter not found");
11047 Pos = It->second;
11048 ParmTy = PVD->getType();
11049 }
11050 ParamAttrs[Pos].Alignment =
11051 (*NI)
11052 ? (*NI)->EvaluateKnownConstInt(C)
11053 : llvm::APSInt::getUnsigned(
11054 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11055 .getQuantity());
11056 ++NI;
11057 }
11058 // Mark linear parameters.
11059 auto *SI = Attr->steps_begin();
11060 auto *MI = Attr->modifiers_begin();
11061 for (const Expr *E : Attr->linears()) {
11062 E = E->IgnoreParenImpCasts();
11063 unsigned Pos;
11064 bool IsReferenceType = false;
11065 // Rescaling factor needed to compute the linear parameter
11066 // value in the mangled name.
11067 unsigned PtrRescalingFactor = 1;
11068 if (isa<CXXThisExpr>(E)) {
11069 Pos = ParamPositions[FD];
11070 auto *P = cast<PointerType>(E->getType());
11071 PtrRescalingFactor = CGM.getContext()
11072 .getTypeSizeInChars(P->getPointeeType())
11073 .getQuantity();
11074 } else {
11075 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11076 ->getCanonicalDecl();
11077 auto It = ParamPositions.find(PVD);
11078 assert(It != ParamPositions.end() && "Function parameter not found");
11079 Pos = It->second;
11080 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11081 PtrRescalingFactor = CGM.getContext()
11082 .getTypeSizeInChars(P->getPointeeType())
11083 .getQuantity();
11084 else if (PVD->getType()->isReferenceType()) {
11085 IsReferenceType = true;
11086 PtrRescalingFactor =
11087 CGM.getContext()
11088 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11089 .getQuantity();
11090 }
11091 }
11092 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11093 if (*MI == OMPC_LINEAR_ref)
11094 ParamAttr.Kind = LinearRef;
11095 else if (*MI == OMPC_LINEAR_uval)
11096 ParamAttr.Kind = LinearUVal;
11097 else if (IsReferenceType)
11098 ParamAttr.Kind = LinearVal;
11099 else
11100 ParamAttr.Kind = Linear;
11101 // Assuming a stride of 1, for `linear` without modifiers.
11102 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11103 if (*SI) {
11105 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11106 if (const auto *DRE =
11107 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11108 if (const auto *StridePVD =
11109 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11110 ParamAttr.HasVarStride = true;
11111 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11112 assert(It != ParamPositions.end() &&
11113 "Function parameter not found");
11114 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11115 }
11116 }
11117 } else {
11118 ParamAttr.StrideOrArg = Result.Val.getInt();
11119 }
11120 }
11121 // If we are using a linear clause on a pointer, we need to
11122 // rescale the value of linear_step with the byte size of the
11123 // pointee type.
11124 if (!ParamAttr.HasVarStride &&
11125 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11126 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11127 ++SI;
11128 ++MI;
11129 }
11130 llvm::APSInt VLENVal;
11131 SourceLocation ExprLoc;
11132 const Expr *VLENExpr = Attr->getSimdlen();
11133 if (VLENExpr) {
11134 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11135 ExprLoc = VLENExpr->getExprLoc();
11136 }
11137 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11138 if (CGM.getTriple().isX86()) {
11139 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11140 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11141 unsigned VLEN = VLENVal.getExtValue();
11142 StringRef MangledName = Fn->getName();
11143 if (CGM.getTarget().hasFeature("sve"))
11144 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11145 MangledName, 's', 128, Fn, ExprLoc);
11146 else if (CGM.getTarget().hasFeature("neon"))
11147 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11148 MangledName, 'n', 128, Fn, ExprLoc);
11149 }
11150 }
11151 FD = FD->getPreviousDecl();
11152 }
11153}
11154
11155namespace {
11156/// Cleanup action for doacross support.
11157class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11158public:
11159 static const int DoacrossFinArgs = 2;
11160
11161private:
11162 llvm::FunctionCallee RTLFn;
11163 llvm::Value *Args[DoacrossFinArgs];
11164
11165public:
11166 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11167 ArrayRef<llvm::Value *> CallArgs)
11168 : RTLFn(RTLFn) {
11169 assert(CallArgs.size() == DoacrossFinArgs);
11170 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11171 }
11172 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11173 if (!CGF.HaveInsertPoint())
11174 return;
11175 CGF.EmitRuntimeCall(RTLFn, Args);
11176 }
11177};
11178} // namespace
11179
11181 const OMPLoopDirective &D,
11182 ArrayRef<Expr *> NumIterations) {
11183 if (!CGF.HaveInsertPoint())
11184 return;
11185
11187 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11188 RecordDecl *RD;
11189 if (KmpDimTy.isNull()) {
11190 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11191 // kmp_int64 lo; // lower
11192 // kmp_int64 up; // upper
11193 // kmp_int64 st; // stride
11194 // };
11195 RD = C.buildImplicitRecord("kmp_dim");
11196 RD->startDefinition();
11197 addFieldToRecordDecl(C, RD, Int64Ty);
11198 addFieldToRecordDecl(C, RD, Int64Ty);
11199 addFieldToRecordDecl(C, RD, Int64Ty);
11200 RD->completeDefinition();
11201 KmpDimTy = C.getRecordType(RD);
11202 } else {
11203 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11204 }
11205 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11206 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11208
11209 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11210 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11211 enum { LowerFD = 0, UpperFD, StrideFD };
11212 // Fill dims with data.
11213 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11214 LValue DimsLVal = CGF.MakeAddrLValue(
11215 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11216 // dims.upper = num_iterations;
11217 LValue UpperLVal = CGF.EmitLValueForField(
11218 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11219 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11220 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11221 Int64Ty, NumIterations[I]->getExprLoc());
11222 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11223 // dims.stride = 1;
11224 LValue StrideLVal = CGF.EmitLValueForField(
11225 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11226 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11227 StrideLVal);
11228 }
11229
11230 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11231 // kmp_int32 num_dims, struct kmp_dim * dims);
11232 llvm::Value *Args[] = {
11234 getThreadID(CGF, D.getBeginLoc()),
11235 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11237 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11238 CGM.VoidPtrTy)};
11239
11240 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11241 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11242 CGF.EmitRuntimeCall(RTLFn, Args);
11243 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11245 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11246 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11247 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11248 llvm::ArrayRef(FiniArgs));
11249}
11250
11251template <typename T>
11253 const T *C, llvm::Value *ULoc,
11254 llvm::Value *ThreadID) {
11255 QualType Int64Ty =
11256 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11257 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11259 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11260 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11261 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11262 const Expr *CounterVal = C->getLoopData(I);
11263 assert(CounterVal);
11264 llvm::Value *CntVal = CGF.EmitScalarConversion(
11265 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11266 CounterVal->getExprLoc());
11267 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11268 /*Volatile=*/false, Int64Ty);
11269 }
11270 llvm::Value *Args[] = {
11271 ULoc, ThreadID,
11272 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11273 llvm::FunctionCallee RTLFn;
11274 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11275 OMPDoacrossKind<T> ODK;
11276 if (ODK.isSource(C)) {
11277 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11278 OMPRTL___kmpc_doacross_post);
11279 } else {
11280 assert(ODK.isSink(C) && "Expect sink modifier.");
11281 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11282 OMPRTL___kmpc_doacross_wait);
11283 }
11284 CGF.EmitRuntimeCall(RTLFn, Args);
11285}
11286
11288 const OMPDependClause *C) {
11289 return EmitDoacrossOrdered<OMPDependClause>(
11290 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11291 getThreadID(CGF, C->getBeginLoc()));
11292}
11293
11295 const OMPDoacrossClause *C) {
11296 return EmitDoacrossOrdered<OMPDoacrossClause>(
11297 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11298 getThreadID(CGF, C->getBeginLoc()));
11299}
11300
11302 llvm::FunctionCallee Callee,
11303 ArrayRef<llvm::Value *> Args) const {
11304 assert(Loc.isValid() && "Outlined function call location must be valid.");
11306
11307 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11308 if (Fn->doesNotThrow()) {
11309 CGF.EmitNounwindRuntimeCall(Fn, Args);
11310 return;
11311 }
11312 }
11313 CGF.EmitRuntimeCall(Callee, Args);
11314}
11315
11317 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11318 ArrayRef<llvm::Value *> Args) const {
11319 emitCall(CGF, Loc, OutlinedFn, Args);
11320}
11321
11323 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11324 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11326}
11327
11329 const VarDecl *NativeParam,
11330 const VarDecl *TargetParam) const {
11331 return CGF.GetAddrOfLocalVar(NativeParam);
11332}
11333
11334/// Return allocator value from expression, or return a null allocator (default
11335/// when no allocator specified).
11336static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11337 const Expr *Allocator) {
11338 llvm::Value *AllocVal;
11339 if (Allocator) {
11340 AllocVal = CGF.EmitScalarExpr(Allocator);
11341 // According to the standard, the original allocator type is a enum
11342 // (integer). Convert to pointer type, if required.
11343 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11344 CGF.getContext().VoidPtrTy,
11345 Allocator->getExprLoc());
11346 } else {
11347 // If no allocator specified, it defaults to the null allocator.
11348 AllocVal = llvm::Constant::getNullValue(
11350 }
11351 return AllocVal;
11352}
11353
11354/// Return the alignment from an allocate directive if present.
11355static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11356 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11357
11358 if (!AllocateAlignment)
11359 return nullptr;
11360
11361 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11362}
11363
11365 const VarDecl *VD) {
11366 if (!VD)
11367 return Address::invalid();
11368 Address UntiedAddr = Address::invalid();
11369 Address UntiedRealAddr = Address::invalid();
11370 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11371 if (It != FunctionToUntiedTaskStackMap.end()) {
11372 const UntiedLocalVarsAddressesMap &UntiedData =
11373 UntiedLocalVarsStack[It->second];
11374 auto I = UntiedData.find(VD);
11375 if (I != UntiedData.end()) {
11376 UntiedAddr = I->second.first;
11377 UntiedRealAddr = I->second.second;
11378 }
11379 }
11380 const VarDecl *CVD = VD->getCanonicalDecl();
11381 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11382 // Use the default allocation.
11383 if (!isAllocatableDecl(VD))
11384 return UntiedAddr;
11385 llvm::Value *Size;
11386 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11387 if (CVD->getType()->isVariablyModifiedType()) {
11388 Size = CGF.getTypeSize(CVD->getType());
11389 // Align the size: ((size + align - 1) / align) * align
11390 Size = CGF.Builder.CreateNUWAdd(
11391 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11392 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11393 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11394 } else {
11396 Size = CGM.getSize(Sz.alignTo(Align));
11397 }
11398 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11399 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11400 const Expr *Allocator = AA->getAllocator();
11401 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11402 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11404 Args.push_back(ThreadID);
11405 if (Alignment)
11406 Args.push_back(Alignment);
11407 Args.push_back(Size);
11408 Args.push_back(AllocVal);
11409 llvm::omp::RuntimeFunction FnID =
11410 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11411 llvm::Value *Addr = CGF.EmitRuntimeCall(
11412 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11413 getName({CVD->getName(), ".void.addr"}));
11414 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11415 CGM.getModule(), OMPRTL___kmpc_free);
11418 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11419 if (UntiedAddr.isValid())
11420 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11421
11422 // Cleanup action for allocate support.
11423 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11424 llvm::FunctionCallee RTLFn;
11425 SourceLocation::UIntTy LocEncoding;
11426 Address Addr;
11427 const Expr *AllocExpr;
11428
11429 public:
11430 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11431 SourceLocation::UIntTy LocEncoding, Address Addr,
11432 const Expr *AllocExpr)
11433 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11434 AllocExpr(AllocExpr) {}
11435 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11436 if (!CGF.HaveInsertPoint())
11437 return;
11438 llvm::Value *Args[3];
11439 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11440 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11442 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11443 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11444 Args[2] = AllocVal;
11445 CGF.EmitRuntimeCall(RTLFn, Args);
11446 }
11447 };
11448 Address VDAddr =
11449 UntiedRealAddr.isValid()
11450 ? UntiedRealAddr
11451 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11452 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11453 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11454 VDAddr, Allocator);
11455 if (UntiedRealAddr.isValid())
11456 if (auto *Region =
11457 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11458 Region->emitUntiedSwitch(CGF);
11459 return VDAddr;
11460 }
11461 return UntiedAddr;
11462}
11463
11465 const VarDecl *VD) const {
11466 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11467 if (It == FunctionToUntiedTaskStackMap.end())
11468 return false;
11469 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11470}
11471
11474 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11475 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11476 if (!NeedToPush)
11477 return;
11479 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11480 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11481 for (const Stmt *Ref : C->private_refs()) {
11482 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11483 const ValueDecl *VD;
11484 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11485 VD = DRE->getDecl();
11486 } else {
11487 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11488 assert((ME->isImplicitCXXThis() ||
11489 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11490 "Expected member of current class.");
11491 VD = ME->getMemberDecl();
11492 }
11493 DS.insert(VD);
11494 }
11495 }
11496}
11497
11499 if (!NeedToPush)
11500 return;
11502}
11503
11505 CodeGenFunction &CGF,
11506 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11507 std::pair<Address, Address>> &LocalVars)
11508 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11509 if (!NeedToPush)
11510 return;
11512 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11513 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11514}
11515
11517 if (!NeedToPush)
11518 return;
11520}
11521
11523 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11524
11525 return llvm::any_of(
11527 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11528}
11529
11530void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11531 const OMPExecutableDirective &S,
11532 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11533 const {
11535 // Vars in target/task regions must be excluded completely.
11536 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11537 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11539 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11540 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11541 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11542 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11543 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11544 }
11545 }
11546 // Exclude vars in private clauses.
11547 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11548 for (const Expr *Ref : C->varlist()) {
11549 if (!Ref->getType()->isScalarType())
11550 continue;
11551 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11552 if (!DRE)
11553 continue;
11554 NeedToCheckForLPCs.insert(DRE->getDecl());
11555 }
11556 }
11557 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11558 for (const Expr *Ref : C->varlist()) {
11559 if (!Ref->getType()->isScalarType())
11560 continue;
11561 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11562 if (!DRE)
11563 continue;
11564 NeedToCheckForLPCs.insert(DRE->getDecl());
11565 }
11566 }
11567 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11568 for (const Expr *Ref : C->varlist()) {
11569 if (!Ref->getType()->isScalarType())
11570 continue;
11571 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11572 if (!DRE)
11573 continue;
11574 NeedToCheckForLPCs.insert(DRE->getDecl());
11575 }
11576 }
11577 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11578 for (const Expr *Ref : C->varlist()) {
11579 if (!Ref->getType()->isScalarType())
11580 continue;
11581 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11582 if (!DRE)
11583 continue;
11584 NeedToCheckForLPCs.insert(DRE->getDecl());
11585 }
11586 }
11587 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11588 for (const Expr *Ref : C->varlist()) {
11589 if (!Ref->getType()->isScalarType())
11590 continue;
11591 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11592 if (!DRE)
11593 continue;
11594 NeedToCheckForLPCs.insert(DRE->getDecl());
11595 }
11596 }
11597 for (const Decl *VD : NeedToCheckForLPCs) {
11598 for (const LastprivateConditionalData &Data :
11600 if (Data.DeclToUniqueName.count(VD) > 0) {
11601 if (!Data.Disabled)
11602 NeedToAddForLPCsAsDisabled.insert(VD);
11603 break;
11604 }
11605 }
11606 }
11607}
11608
11609CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11610 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11611 : CGM(CGF.CGM),
11612 Action((CGM.getLangOpts().OpenMP >= 50 &&
11613 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11614 [](const OMPLastprivateClause *C) {
11615 return C->getKind() ==
11616 OMPC_LASTPRIVATE_conditional;
11617 }))
11618 ? ActionToDo::PushAsLastprivateConditional
11619 : ActionToDo::DoNotPush) {
11620 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11621 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11622 return;
11623 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11624 "Expected a push action.");
11627 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11628 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11629 continue;
11630
11631 for (const Expr *Ref : C->varlist()) {
11632 Data.DeclToUniqueName.insert(std::make_pair(
11633 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11634 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11635 }
11636 }
11637 Data.IVLVal = IVLVal;
11638 Data.Fn = CGF.CurFn;
11639}
11640
11641CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11643 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11644 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11645 if (CGM.getLangOpts().OpenMP < 50)
11646 return;
11647 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11648 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11649 if (!NeedToAddForLPCsAsDisabled.empty()) {
11650 Action = ActionToDo::DisableLastprivateConditional;
11651 LastprivateConditionalData &Data =
11652 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11653 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11654 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
11655 Data.Fn = CGF.CurFn;
11656 Data.Disabled = true;
11657 }
11658}
11659
11662 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11663 return LastprivateConditionalRAII(CGF, S);
11664}
11665
11667 if (CGM.getLangOpts().OpenMP < 50)
11668 return;
11669 if (Action == ActionToDo::DisableLastprivateConditional) {
11670 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11671 "Expected list of disabled private vars.");
11673 }
11674 if (Action == ActionToDo::PushAsLastprivateConditional) {
11675 assert(
11677 "Expected list of lastprivate conditional vars.");
11679 }
11680}
11681
11683 const VarDecl *VD) {
11685 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
11686 if (I == LastprivateConditionalToTypes.end())
11687 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11688 QualType NewType;
11689 const FieldDecl *VDField;
11690 const FieldDecl *FiredField;
11691 LValue BaseLVal;
11692 auto VI = I->getSecond().find(VD);
11693 if (VI == I->getSecond().end()) {
11694 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11695 RD->startDefinition();
11696 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11697 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11698 RD->completeDefinition();
11699 NewType = C.getRecordType(RD);
11700 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11701 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11702 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11703 } else {
11704 NewType = std::get<0>(VI->getSecond());
11705 VDField = std::get<1>(VI->getSecond());
11706 FiredField = std::get<2>(VI->getSecond());
11707 BaseLVal = std::get<3>(VI->getSecond());
11708 }
11709 LValue FiredLVal =
11710 CGF.EmitLValueForField(BaseLVal, FiredField);
11712 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11713 FiredLVal);
11714 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11715}
11716
11717namespace {
11718/// Checks if the lastprivate conditional variable is referenced in LHS.
11719class LastprivateConditionalRefChecker final
11720 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11722 const Expr *FoundE = nullptr;
11723 const Decl *FoundD = nullptr;
11724 StringRef UniqueDeclName;
11725 LValue IVLVal;
11726 llvm::Function *FoundFn = nullptr;
11728
11729public:
11730 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11732 llvm::reverse(LPM)) {
11733 auto It = D.DeclToUniqueName.find(E->getDecl());
11734 if (It == D.DeclToUniqueName.end())
11735 continue;
11736 if (D.Disabled)
11737 return false;
11738 FoundE = E;
11739 FoundD = E->getDecl()->getCanonicalDecl();
11740 UniqueDeclName = It->second;
11741 IVLVal = D.IVLVal;
11742 FoundFn = D.Fn;
11743 break;
11744 }
11745 return FoundE == E;
11746 }
11747 bool VisitMemberExpr(const MemberExpr *E) {
11748 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
11749 return false;
11751 llvm::reverse(LPM)) {
11752 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11753 if (It == D.DeclToUniqueName.end())
11754 continue;
11755 if (D.Disabled)
11756 return false;
11757 FoundE = E;
11758 FoundD = E->getMemberDecl()->getCanonicalDecl();
11759 UniqueDeclName = It->second;
11760 IVLVal = D.IVLVal;
11761 FoundFn = D.Fn;
11762 break;
11763 }
11764 return FoundE == E;
11765 }
11766 bool VisitStmt(const Stmt *S) {
11767 for (const Stmt *Child : S->children()) {
11768 if (!Child)
11769 continue;
11770 if (const auto *E = dyn_cast<Expr>(Child))
11771 if (!E->isGLValue())
11772 continue;
11773 if (Visit(Child))
11774 return true;
11775 }
11776 return false;
11777 }
11778 explicit LastprivateConditionalRefChecker(
11780 : LPM(LPM) {}
11781 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11782 getFoundData() const {
11783 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11784 }
11785};
11786} // namespace
11787
11789 LValue IVLVal,
11790 StringRef UniqueDeclName,
11791 LValue LVal,
11793 // Last updated loop counter for the lastprivate conditional var.
11794 // int<xx> last_iv = 0;
11795 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11796 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11797 LLIVTy, getName({UniqueDeclName, "iv"}));
11798 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11799 IVLVal.getAlignment().getAsAlign());
11800 LValue LastIVLVal =
11801 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11802
11803 // Last value of the lastprivate conditional.
11804 // decltype(priv_a) last_a;
11805 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11806 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11807 cast<llvm::GlobalVariable>(Last)->setAlignment(
11808 LVal.getAlignment().getAsAlign());
11809 LValue LastLVal =
11810 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11811
11812 // Global loop counter. Required to handle inner parallel-for regions.
11813 // iv
11814 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11815
11816 // #pragma omp critical(a)
11817 // if (last_iv <= iv) {
11818 // last_iv = iv;
11819 // last_a = priv_a;
11820 // }
11821 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11822 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11823 Action.Enter(CGF);
11824 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11825 // (last_iv <= iv) ? Check if the variable is updated and store new
11826 // value in global var.
11827 llvm::Value *CmpRes;
11828 if (IVLVal.getType()->isSignedIntegerType()) {
11829 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11830 } else {
11831 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11832 "Loop iteration variable must be integer.");
11833 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11834 }
11835 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11836 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11837 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11838 // {
11839 CGF.EmitBlock(ThenBB);
11840
11841 // last_iv = iv;
11842 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11843
11844 // last_a = priv_a;
11845 switch (CGF.getEvaluationKind(LVal.getType())) {
11846 case TEK_Scalar: {
11847 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11848 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11849 break;
11850 }
11851 case TEK_Complex: {
11853 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11854 break;
11855 }
11856 case TEK_Aggregate:
11857 llvm_unreachable(
11858 "Aggregates are not supported in lastprivate conditional.");
11859 }
11860 // }
11861 CGF.EmitBranch(ExitBB);
11862 // There is no need to emit line number for unconditional branch.
11864 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11865 };
11866
11867 if (CGM.getLangOpts().OpenMPSimd) {
11868 // Do not emit as a critical region as no parallel region could be emitted.
11869 RegionCodeGenTy ThenRCG(CodeGen);
11870 ThenRCG(CGF);
11871 } else {
11872 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11873 }
11874}
11875
11877 const Expr *LHS) {
11878 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11879 return;
11880 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11881 if (!Checker.Visit(LHS))
11882 return;
11883 const Expr *FoundE;
11884 const Decl *FoundD;
11885 StringRef UniqueDeclName;
11886 LValue IVLVal;
11887 llvm::Function *FoundFn;
11888 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
11889 Checker.getFoundData();
11890 if (FoundFn != CGF.CurFn) {
11891 // Special codegen for inner parallel regions.
11892 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11893 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
11894 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11895 "Lastprivate conditional is not found in outer region.");
11896 QualType StructTy = std::get<0>(It->getSecond());
11897 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
11898 LValue PrivLVal = CGF.EmitLValue(FoundE);
11900 PrivLVal.getAddress(),
11901 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
11902 CGF.ConvertTypeForMem(StructTy));
11903 LValue BaseLVal =
11904 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
11905 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
11906 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11907 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
11908 FiredLVal, llvm::AtomicOrdering::Unordered,
11909 /*IsVolatile=*/true, /*isInit=*/false);
11910 return;
11911 }
11912
11913 // Private address of the lastprivate conditional in the current context.
11914 // priv_a
11915 LValue LVal = CGF.EmitLValue(FoundE);
11916 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11917 FoundE->getExprLoc());
11918}
11919
11922 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11923 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11924 return;
11925 auto Range = llvm::reverse(LastprivateConditionalStack);
11926 auto It = llvm::find_if(
11927 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
11928 if (It == Range.end() || It->Fn != CGF.CurFn)
11929 return;
11930 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
11931 assert(LPCI != LastprivateConditionalToTypes.end() &&
11932 "Lastprivates must be registered already.");
11934 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11935 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11936 for (const auto &Pair : It->DeclToUniqueName) {
11937 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
11938 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
11939 continue;
11940 auto I = LPCI->getSecond().find(Pair.first);
11941 assert(I != LPCI->getSecond().end() &&
11942 "Lastprivate must be rehistered already.");
11943 // bool Cmp = priv_a.Fired != 0;
11944 LValue BaseLVal = std::get<3>(I->getSecond());
11945 LValue FiredLVal =
11946 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
11947 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
11948 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
11949 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
11950 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
11951 // if (Cmp) {
11952 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
11953 CGF.EmitBlock(ThenBB);
11954 Address Addr = CGF.GetAddrOfLocalVar(VD);
11955 LValue LVal;
11956 if (VD->getType()->isReferenceType())
11957 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11959 else
11960 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11962 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
11963 D.getBeginLoc());
11965 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
11966 // }
11967 }
11968}
11969
11971 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11973 if (CGF.getLangOpts().OpenMP < 50)
11974 return;
11975 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11976 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11977 "Unknown lastprivate conditional variable.");
11978 StringRef UniqueName = It->second;
11979 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11980 // The variable was not updated in the region - exit.
11981 if (!GV)
11982 return;
11983 LValue LPLVal = CGF.MakeRawAddrLValue(
11984 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11985 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11986 CGF.EmitStoreOfScalar(Res, PrivLVal);
11987}
11988
11991 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11992 const RegionCodeGenTy &CodeGen) {
11993 llvm_unreachable("Not supported in SIMD-only mode");
11994}
11995
11998 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11999 const RegionCodeGenTy &CodeGen) {
12000 llvm_unreachable("Not supported in SIMD-only mode");
12001}
12002
12004 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12005 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12006 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12007 bool Tied, unsigned &NumberOfParts) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12009}
12010
12013 llvm::Function *OutlinedFn,
12014 ArrayRef<llvm::Value *> CapturedVars,
12015 const Expr *IfCond,
12016 llvm::Value *NumThreads) {
12017 llvm_unreachable("Not supported in SIMD-only mode");
12018}
12019
12021 CodeGenFunction &CGF, StringRef CriticalName,
12022 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12023 const Expr *Hint) {
12024 llvm_unreachable("Not supported in SIMD-only mode");
12025}
12026
12028 const RegionCodeGenTy &MasterOpGen,
12030 llvm_unreachable("Not supported in SIMD-only mode");
12031}
12032
12034 const RegionCodeGenTy &MasterOpGen,
12036 const Expr *Filter) {
12037 llvm_unreachable("Not supported in SIMD-only mode");
12038}
12039
12042 llvm_unreachable("Not supported in SIMD-only mode");
12043}
12044
12046 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12048 llvm_unreachable("Not supported in SIMD-only mode");
12049}
12050
12052 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12053 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12055 ArrayRef<const Expr *> AssignmentOps) {
12056 llvm_unreachable("Not supported in SIMD-only mode");
12057}
12058
12060 const RegionCodeGenTy &OrderedOpGen,
12062 bool IsThreads) {
12063 llvm_unreachable("Not supported in SIMD-only mode");
12064}
12065
12069 bool EmitChecks,
12070 bool ForceSimpleCall) {
12071 llvm_unreachable("Not supported in SIMD-only mode");
12072}
12073
12076 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12077 bool Ordered, const DispatchRTInput &DispatchValues) {
12078 llvm_unreachable("Not supported in SIMD-only mode");
12079}
12080
12083 llvm_unreachable("Not supported in SIMD-only mode");
12084}
12085
12088 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12089 llvm_unreachable("Not supported in SIMD-only mode");
12090}
12091
12094 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12095 llvm_unreachable("Not supported in SIMD-only mode");
12096}
12097
12100 unsigned IVSize,
12101 bool IVSigned) {
12102 llvm_unreachable("Not supported in SIMD-only mode");
12103}
12104
12107 OpenMPDirectiveKind DKind) {
12108 llvm_unreachable("Not supported in SIMD-only mode");
12109}
12110
12113 unsigned IVSize, bool IVSigned,
12114 Address IL, Address LB,
12115 Address UB, Address ST) {
12116 llvm_unreachable("Not supported in SIMD-only mode");
12117}
12118
12120 llvm::Value *NumThreads,
12122 llvm_unreachable("Not supported in SIMD-only mode");
12123}
12124
12126 ProcBindKind ProcBind,
12128 llvm_unreachable("Not supported in SIMD-only mode");
12129}
12130
12132 const VarDecl *VD,
12133 Address VDAddr,
12135 llvm_unreachable("Not supported in SIMD-only mode");
12136}
12137
12139 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12140 CodeGenFunction *CGF) {
12141 llvm_unreachable("Not supported in SIMD-only mode");
12142}
12143
12145 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12146 llvm_unreachable("Not supported in SIMD-only mode");
12147}
12148
12152 llvm::AtomicOrdering AO) {
12153 llvm_unreachable("Not supported in SIMD-only mode");
12154}
12155
12158 llvm::Function *TaskFunction,
12159 QualType SharedsTy, Address Shareds,
12160 const Expr *IfCond,
12161 const OMPTaskDataTy &Data) {
12162 llvm_unreachable("Not supported in SIMD-only mode");
12163}
12164
12167 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12168 const Expr *IfCond, const OMPTaskDataTy &Data) {
12169 llvm_unreachable("Not supported in SIMD-only mode");
12170}
12171
12175 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12176 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12177 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12178 ReductionOps, Options);
12179}
12180
12183 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12184 llvm_unreachable("Not supported in SIMD-only mode");
12185}
12186
12189 bool IsWorksharingReduction) {
12190 llvm_unreachable("Not supported in SIMD-only mode");
12191}
12192
12195 ReductionCodeGen &RCG,
12196 unsigned N) {
12197 llvm_unreachable("Not supported in SIMD-only mode");
12198}
12199
12202 llvm::Value *ReductionsPtr,
12203 LValue SharedLVal) {
12204 llvm_unreachable("Not supported in SIMD-only mode");
12205}
12206
12209 const OMPTaskDataTy &Data) {
12210 llvm_unreachable("Not supported in SIMD-only mode");
12211}
12212
12215 OpenMPDirectiveKind CancelRegion) {
12216 llvm_unreachable("Not supported in SIMD-only mode");
12217}
12218
12220 SourceLocation Loc, const Expr *IfCond,
12221 OpenMPDirectiveKind CancelRegion) {
12222 llvm_unreachable("Not supported in SIMD-only mode");
12223}
12224
12226 const OMPExecutableDirective &D, StringRef ParentName,
12227 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12228 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12229 llvm_unreachable("Not supported in SIMD-only mode");
12230}
12231
12234 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12235 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12236 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12237 const OMPLoopDirective &D)>
12238 SizeEmitter) {
12239 llvm_unreachable("Not supported in SIMD-only mode");
12240}
12241
12243 llvm_unreachable("Not supported in SIMD-only mode");
12244}
12245
12247 llvm_unreachable("Not supported in SIMD-only mode");
12248}
12249
12251 return false;
12252}
12253
12257 llvm::Function *OutlinedFn,
12258 ArrayRef<llvm::Value *> CapturedVars) {
12259 llvm_unreachable("Not supported in SIMD-only mode");
12260}
12261
12263 const Expr *NumTeams,
12264 const Expr *ThreadLimit,
12266 llvm_unreachable("Not supported in SIMD-only mode");
12267}
12268
12270 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12271 const Expr *Device, const RegionCodeGenTy &CodeGen,
12273 llvm_unreachable("Not supported in SIMD-only mode");
12274}
12275
12277 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12278 const Expr *Device) {
12279 llvm_unreachable("Not supported in SIMD-only mode");
12280}
12281
12283 const OMPLoopDirective &D,
12284 ArrayRef<Expr *> NumIterations) {
12285 llvm_unreachable("Not supported in SIMD-only mode");
12286}
12287
12289 const OMPDependClause *C) {
12290 llvm_unreachable("Not supported in SIMD-only mode");
12291}
12292
12294 const OMPDoacrossClause *C) {
12295 llvm_unreachable("Not supported in SIMD-only mode");
12296}
12297
12298const VarDecl *
12300 const VarDecl *NativeParam) const {
12301 llvm_unreachable("Not supported in SIMD-only mode");
12302}
12303
12304Address
12306 const VarDecl *NativeParam,
12307 const VarDecl *TargetParam) const {
12308 llvm_unreachable("Not supported in SIMD-only mode");
12309}
#define V(N, I)
Definition: ASTContext.h:3341
StringRef P
#define SM(sm)
Definition: Cuda.cpp:83
Provides LLVM's BitmaskEnum facility to enumeration types declared in namespace clang.
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
const Decl * D
Expr * E
enum clang::sema::@1658::IndirectLocalPathEntry::EntryKind Kind
Defines the clang::FileManager interface and associated types.
int Priority
Definition: Format.cpp:3005
#define X(type, name)
Definition: Value.h:143
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
SourceRange Range
Definition: SemaObjC.cpp:757
SourceLocation Loc
Definition: SemaObjC.cpp:758
Defines the SourceManager interface.
const char * Data
This file defines OpenMP AST classes for executable directives and clauses.
SourceLocation Begin
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:187
SourceManager & getSourceManager()
Definition: ASTContext.h:721
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2825
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2644
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
Definition: ASTContext.h:1146
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition: ASTContext.h:797
CanQualType BoolTy
Definition: ASTContext.h:1120
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType VoidTy
Definition: ASTContext.h:1119
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2828
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:779
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:210
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:5086
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:3566
Attr - This represents one attribute.
Definition: Attr.h:42
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2539
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2803
Represents a static or instance method of a struct/union/class.
Definition: DeclCXX.h:2064
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition: DeclCXX.h:2190
QualType getFunctionObjectParameterType() const
Definition: DeclCXX.h:2214
Represents a C++ struct/union/class.
Definition: DeclCXX.h:258
base_class_range bases()
Definition: DeclCXX.h:620
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1023
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1680
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition: DeclCXX.h:614
base_class_range vbases()
Definition: DeclCXX.h:637
capture_const_range captures() const
Definition: DeclCXX.h:1102
ctor_range ctors() const
Definition: DeclCXX.h:682
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition: DeclCXX.cpp:2014
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Definition: Redeclarable.h:350
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:3775
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3809
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1305
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3815
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:3803
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3806
This captures a statement into a function.
Definition: Stmt.h:3762
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:3913
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:3883
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3866
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition: Stmt.cpp:1431
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:3908
capture_range captures()
Definition: Stmt.h:3900
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition: CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition: Address.h:128
static Address invalid()
Definition: Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition: Address.h:251
CharUnits getAlignment() const
Definition: Address.h:189
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:207
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition: Address.h:259
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition: Address.h:274
Address withAlignment(CharUnits NewAlignment) const
Return address with different alignment, but same pointer and element type.
Definition: Address.h:267
bool isValid() const
Definition: Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:199
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:895
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:902
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:912
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:156
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:135
llvm::Value * CreateIsNull(Address Addr, const Twine &Name="")
Definition: CGBuilder.h:354
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:291
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition: CGBuilder.h:202
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition: CGBuilder.h:240
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:107
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:363
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition: CGBuilder.h:277
MangleContext & getMangleContext()
Gets the mangle context.
Definition: CGCXXABI.h:113
CGFunctionInfo - Class to encapsulate the information about a function definition.
Manages list of lastprivate conditional decls for the specified directive.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::FunctionType * Kmpc_MicroTy
The type for a microtask which gets passed to __kmpc_fork_call().
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
void emitUDMapperArrayInitOrDel(CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *BasePtr, llvm::Value *Ptr, llvm::Value *Size, llvm::Value *MapType, llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit)
Emit the array initialization or deletion portion for user-defined mapper code generation.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits Callee function call with arguments Args with location Loc.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Helper to determine the min/max number of threads/teams for D.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
llvm::SmallVector< NontemporalDeclsSet, 4 > NontemporalDeclsStack
Stack for list of declarations in current context marked as nontemporal.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid, kmp_int32 thread_limit...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args=std::nullopt) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CGCapturedStmtInfo * CapturedStmtInfo
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
const CodeGen::CGBlockInfo * BlockInfo
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
llvm::Type * ConvertTypeForMem(QualType T)
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
JumpDest ReturnBlock
ReturnBlock - Unified return block.
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
void EmitAutoVarCleanups(const AutoVarEmission &emission)
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertType(QualType T)
CodeGenTypes & getTypes() const
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
LValue EmitStringLiteralLValue(const StringLiteral *E)
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc)
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
This class organizes the cross-function state that is used while generating LLVM code.
void handleCUDALaunchBoundsAttr(llvm::Function *F, const CUDALaunchBoundsAttr *A, int32_t *MaxThreadsVal=nullptr, int32_t *MinBlocksVal=nullptr, int32_t *MaxClusterRankVal=nullptr)
Emit the IR encoding to attach the CUDA launch bounds attribute to F.
Definition: NVPTX.cpp:314
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F, const AMDGPUWavesPerEUAttr *A)
Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to F.
Definition: AMDGPU.cpp:690
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
const TargetInfo & getTarget() const
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
void handleAMDGPUFlatWorkGroupSizeAttr(llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A, const ReqdWorkGroupSizeAttr *ReqdWGS=nullptr, int32_t *MinThreadsVal=nullptr, int32_t *MaxThreadsVal=nullptr)
Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute to F.
Definition: AMDGPU.cpp:663
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD)
Returns LLVM linkage for a declarator.
CGCXXABI & getCXXABI() const
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
const llvm::Triple & getTriple() const
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
ASTContext & getContext() const
const TargetCodeGenInfo & getTargetCodeGenInfo()
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition: CGDecl.cpp:2821
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Function * CreateGlobalInitOrCleanUpFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false, llvm::GlobalVariable::LinkageTypes Linkage=llvm::GlobalVariable::InternalLinkage)
Definition: CGDeclCXX.cpp:443
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1607
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:680
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type 'void ()'.
Definition: CGCall.cpp:722
A specialization of Address that requires the address to be an LLVM Constant.
Definition: Address.h:294
static ConstantAddress invalid()
Definition: Address.h:302
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:141
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:631
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:243
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition: CGCall.h:368
LValue - This represents an lvalue references.
Definition: CGValue.h:182
CharUnits getAlignment() const
Definition: CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition: CGValue.h:338
Address getAddress() const
Definition: CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:346
QualType getType() const
Definition: CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition: CGValue.h:42
static RValue get(llvm::Value *V)
Definition: CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:71
An abstract representation of an aligned address.
Definition: Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:77
llvm::Value * getPointer() const
Definition: Address.h:66
static RawAddress invalid()
Definition: Address.h:61
bool isValid() const
Definition: Address.h:62
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
virtual void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const
setTargetAttributes - Provides a convenient hook to handle extra target-specific attributes for the g...
Definition: TargetInfo.h:76
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
Definition: StmtVisitor.h:195
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1436
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1766
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1265
ValueDecl * getDecl()
Definition: Expr.h:1333
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:86
SourceLocation getEndLoc() const LLVM_READONLY
Definition: DeclBase.h:442
T * getAttr() const
Definition: DeclBase.h:580
bool hasAttrs() const
Definition: DeclBase.h:525
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:523
void addAttr(Attr *A)
Definition: DeclBase.cpp:1013
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition: DeclBase.h:1083
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:566
SourceLocation getLocation() const
Definition: DeclBase.h:446
DeclContext * getDeclContext()
Definition: DeclBase.h:455
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: DeclBase.h:438
AttrVec & getAttrs()
Definition: DeclBase.h:531
bool hasAttr() const
Definition: DeclBase.h:584
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:968
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:783
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1547
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:873
The return type of classify().
Definition: Expr.h:330
This represents one expression.
Definition: Expr.h:110
bool isGLValue() const
Definition: Expr.h:280
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition: Expr.h:671
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:669
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:3075
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:3070
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3567
bool isIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:277
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc=nullptr) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
QualType getType() const
Definition: Expr.h:142
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3929
Represents a member of a struct/union/class.
Definition: Decl.h:3030
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:4531
Represents a function declaration or definition.
Definition: Decl.h:1932
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2669
QualType getReturnType() const
Definition: Decl.h:2717
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2646
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:3603
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3678
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:56
const Decl * getDecl() const
Definition: GlobalDecl.h:103
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:5378
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition: Expr.cpp:977
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:539
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:535
virtual void mangleCanonicalTypeName(QualType T, raw_ostream &, bool NormalizeIntegers=false)=0
Generates a unique string for an externally visible type for use with TBAA or type uniquing.
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:3187
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition: Expr.h:3270
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:276
bool isExternallyVisible() const
Definition: Decl.h:408
This represents clause 'affinity' in the '#pragma omp task'-based directives.
Class that represents a component of a mappable expression.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:219
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:55
This represents '#pragma omp declare mapper ...' directive.
Definition: DeclOpenMP.h:287
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:177
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:238
This represents implicit clause 'depend' for the '#pragma omp task' directive.
This represents 'detach' clause in the '#pragma omp task' directive.
This represents 'device' clause in the '#pragma omp ...' directive.
This represents the 'doacross' clause for the '#pragma omp ordered' directive.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:266
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:446
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
This represents clause 'has_device_ptr' in the '#pragma omp ...' directives.
This represents 'if' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:587
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:656
This represents clause 'in_reduction' in the '#pragma omp task' directives.
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
OpenMP 5.0 [2.1.6 Iterators] Iterators are identifiers that expand to multiple values in the clause o...
Definition: ExprOpenMP.h:151
This represents clause 'lastprivate' in the '#pragma omp ...' directives.
This represents clause 'linear' in the '#pragma omp ...' directives.
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc....
Definition: StmtOpenMP.h:1004
This represents clause 'map' in the '#pragma omp ...' directives.
This represents clause 'nontemporal' in the '#pragma omp ...' directives.
This represents 'nowait' clause in the '#pragma omp ...' directive.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:736
This represents 'ordered' clause in the '#pragma omp ...' directive.
This represents clause 'private' in the '#pragma omp ...' directives.
This represents clause 'reduction' in the '#pragma omp ...' directives.
This represents '#pragma omp requires...' directive.
Definition: DeclOpenMP.h:417
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
This represents clause 'uses_allocators' in the '#pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the '#pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the '#pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition: Expr.h:1173
Represents a parameter to a function.
Definition: Decl.h:1722
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:3187
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition: Type.h:941
void addRestrict()
Add the restrict qualifier to this QualType.
Definition: Type.h:1179
QualType withRestrict() const
Definition: Type.h:1182
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:1008
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:7750
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:7790
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:7951
QualType getCanonicalType() const
Definition: Type.h:7802
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition: Type.h:1542
Represents a struct/union/class.
Definition: Decl.h:4145
field_iterator field_end() const
Definition: Decl.h:4354
field_range fields() const
Definition: Decl.h:4351
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:5080
bool field_empty() const
Definition: Decl.h:4359
field_iterator field_begin() const
Definition: Decl.cpp:5068
RecordDecl * getDecl() const
Definition: Type.h:5975
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:205
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:227
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:3428
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
fileinfo_iterator fileinfo_end() const
SourceLocation translateFileLineCol(const FileEntry *SourceFile, unsigned Line, unsigned Col) const
Get the source location for the given file:line:col triplet.
fileinfo_iterator fileinfo_begin() const
A trivial tuple used to represent a source range.
Stmt - This represents one statement.
Definition: Stmt.h:84
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:326
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition: Stmt.cpp:197
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:338
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4736
bool isUnion() const
Definition: Decl.h:3767
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1576
virtual bool hasFeature(StringRef Feature) const
Determine whether the given target has the given feature.
Definition: TargetInfo.h:1487
The base class of the type hierarchy.
Definition: Type.h:1829
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1882
bool isVoidType() const
Definition: Type.h:8319
bool isSignedIntegerOrEnumerationType() const
Determines whether this is an integer type that is signed or an enumeration types whose underlying ty...
Definition: Type.cpp:2167
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition: Type.h:8497
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition: Type.cpp:2146
bool isArrayType() const
Definition: Type.h:8075
bool isPointerType() const
Definition: Type.h:8003
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:8359
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:8607
bool isReferenceType() const
Definition: Type.h:8021
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition: Type.cpp:705
bool isLValueReferenceType() const
Definition: Type.h:8025
QualType getCanonicalTypeInternal() const
Definition: Type.h:2978
const RecordType * getAsStructureType() const
Definition: Type.cpp:721
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:8490
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2713
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition: Type.h:8593
bool isFloatingType() const
Definition: Type.cpp:2249
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:2196
bool isAnyPointerType() const
Definition: Type.h:8011
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:8540
bool isRecordType() const
Definition: Type.h:8103
bool isUnionType() const
Definition: Type.cpp:671
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1890
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition: Type.cpp:1886
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:667
QualType getType() const
Definition: Decl.h:678
Represents a variable declaration or definition.
Definition: Decl.h:879
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2239
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2348
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition: Decl.h:1165
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition: Decl.h:1132
@ DeclarationOnly
This declaration is only a declaration.
Definition: Decl.h:1243
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2357
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1210
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition: Decl.h:1306
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3795
Expr * getSizeExpr() const
Definition: Type.h:3814
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:33
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
@ NotKnownNonNull
Definition: Address.h:33
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition: ABI.h:25
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:24
@ ICIS_NoInit
No in-class initializer.
Definition: Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition: Lambda.h:37
BinaryOperatorKind
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:38
@ OMPC_SCHEDULE_MODIFIER_last
Definition: OpenMPKinds.h:43
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition: OpenMPKinds.h:39
@ CR_OpenMP
Definition: CapturedStmt.h:19
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:103
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition: OpenMPKinds.h:54
@ OMPC_DEPEND_unknown
Definition: OpenMPKinds.h:58
@ Dtor_Complete
Complete object dtor.
Definition: ABI.h:35
@ Union
The "union" keyword.
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
LangAS
Defines the address space values used by the address space qualifier of QualType.
Definition: AddressSpaces.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:139
const FunctionProtoType * T
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
Definition: OpenMPKinds.h:131
@ OMPC_DEVICE_unknown
Definition: OpenMPKinds.h:50
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition: OpenMPKinds.h:78
@ OMPC_MAP_MODIFIER_unknown
Definition: OpenMPKinds.h:79
@ Other
Other implicit parameter.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:30
@ OMPC_SCHEDULE_unknown
Definition: OpenMPKinds.h:34
@ AS_public
Definition: Specifiers.h:124
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:70
@ OMPC_MAP_unknown
Definition: OpenMPKinds.h:74
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition: Dominators.h:30
#define false
Definition: stdbool.h:26
#define bool
Definition: stdbool.h:24
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
llvm::IntegerType * IntTy
int
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:642
Extra information about a function prototype.
Definition: Type.h:5087
Helper expressions and declaration for OMPIteratorExpr class for each iteration space.
Definition: ExprOpenMP.h:111
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition: ExprOpenMP.h:121
Expr * Update
Update expression for the originally specified iteration variable, calculated as VD = Begin + Counter...
Definition: ExprOpenMP.h:119
VarDecl * CounterVD
Internal normalized counter.
Definition: ExprOpenMP.h:113
Data for list of allocators.
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:179
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:182
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:181
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:180
Describes how types, statements, expressions, and declarations should be printed.
Definition: PrettyPrinter.h:57