clang 22.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <cstdint>
43#include <numeric>
44#include <optional>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479};
480
481/// Describes ident structure that describes a source location.
482/// All descriptions are taken from
483/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484/// Original structure:
485/// typedef struct ident {
486/// kmp_int32 reserved_1; /**< might be used in Fortran;
487/// see above */
488/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489/// KMP_IDENT_KMPC identifies this union
490/// member */
491/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492/// see above */
493///#if USE_ITT_BUILD
494/// /* but currently used for storing
495/// region-specific ITT */
496/// /* contextual information. */
497///#endif /* USE_ITT_BUILD */
498/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499/// C++ */
500/// char const *psource; /**< String describing the source location.
501/// The string is composed of semi-colon separated
502// fields which describe the source file,
503/// the function and a pair of line numbers that
504/// delimit the construct.
505/// */
506/// } ident_t;
507enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520};
521
522/// Schedule types for 'omp for' loops (these enumerators are taken from
523/// the enum sched_type in kmp.h).
524enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552};
553
554/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555/// region.
556class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559public:
560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566};
567
568} // anonymous namespace
569
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579}
580
581/// Check if the combiner is a call to UDR combiner and if it is so return the
582/// UDR decl used for reduction.
583static const OMPDeclareReductionDecl *
584getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
590 return DRD;
591 return nullptr;
592}
593
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
602 const auto *CE = cast<CallExpr>(InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
612 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
613 (void)PrivateScope.Privatize();
616 CGF.EmitIgnoredExpr(InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646}
647
648/// Emit initialization of arrays of complex types.
649/// \param DestAddr Address of the array.
650/// \param Type Type of array.
651/// \param Init Initial expression of array.
652/// \param SrcAddr Address of the original array.
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
691 "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(DestBegin, EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703
704 // Emit copy.
705 {
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
709 SrcElementCurrent, ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
719 "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
726 "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
731 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
735}
736
737LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739}
740
741LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
744 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
745 return LValue();
746}
747
748void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SharedAddr);
763}
764
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(Shareds.size());
770 SharedAddresses.reserve(Shareds.size());
771 Sizes.reserve(Shareds.size());
772 BaseDecls.reserve(Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
778 std::advance(IOrig, 1);
779 std::advance(IPriv, 1);
780 std::advance(IRed, 1);
781 }
782}
783
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
789 SharedAddresses.emplace_back(First, Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(First, Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
795 OrigAddresses.emplace_back(First, Second);
796 }
797}
798
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
805 nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemType,
814 OrigAddresses[N].second.getPointer(CGF),
815 OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823 }
824 Sizes.emplace_back(SizeInChars, Size);
826 CGF,
828 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
829 RValue::get(Size));
830 CGF.EmitVariablyModifiedType(PrivateType);
831}
832
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
843 CGF,
845 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
846 RValue::get(Size));
847 CGF.EmitVariablyModifiedType(PrivateType);
848}
849
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
866 PrivateAddr, SharedAddr, SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
870 PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873}
874
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879}
880
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
888 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
889 }
890}
891
892static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
907 BaseLV.getType(), BaseLV.getBaseInfo(),
908 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
909}
910
912 Address OriginalBaseAddress, llvm::Value *Addr) {
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919 Tmp = CGF.CreateMemTemp(BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
930 Addr, Tmp.getElementType());
931 CGF.Builder.CreateStore(Addr, Tmp);
932 return MostTopTmp;
933 }
934
936 Addr, OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
938}
939
940static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
949 OrigVD = cast<VarDecl>(DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
955 OrigVD = cast<VarDecl>(DE->getDecl());
956 }
957 return OrigVD;
958}
959
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968 OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
972 SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
975 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 SharedAddr.getElementType(), PrivatePointer, Adjustment);
978 return castToBase(CGF, OrigVD->getType(),
979 SharedAddresses[N].first.getType(),
980 OriginalBaseLValue.getAddress(), Ptr);
981 }
982 BaseDecls.emplace_back(
983 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985}
986
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991}
992
993LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
996 getThreadIDVariable()->getType()->castAs<PointerType>());
997}
998
999void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012}
1013
1014LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1017 getThreadIDVariable()->getType(),
1019}
1020
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1025 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(Field);
1029 return Field;
1030}
1031
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 Config.setDefaultTargetAS(
1041 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1042
1043 OMPBuilder.setConfig(Config);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1046 CGM.getLangOpts().OpenMPIsTargetDevice
1047 ? CGM.getLangOpts().OMPHostIRFile
1048 : StringRef{});
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056}
1057
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071}
1072
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075}
1076
1077static llvm::Function *
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(&OmpOutParm);
1090 Args.push_back(&OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097 Name, &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1100 Fn->removeFnAttr(llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108 Out->getLocation());
1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111 Scope.addPrivate(
1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115 Scope.addPrivate(
1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Out->getInit())) {
1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122 Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130}
1131
1134 if (UDRMap.count(D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, D->getType(), D->getCombiner(),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1144 CGM, D->getType(),
1146 : nullptr,
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(D, Combiner, Initializer);
1152 if (CGF)
1153 FunctionUDRMap[CGF->CurFn].push_back(D);
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1158 auto I = UDRMap.find(D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 return llvm::Error::success();
1196 };
1197
1198 // TODO: Remove this once we emit parallel regions through the
1199 // OpenMPIRBuilder as it can do this setup internally.
1200 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1201 OMPBuilder->pushFinalizationCB(std::move(FI));
1202 }
1203 ~PushAndPopStackRAII() {
1204 if (OMPBuilder)
1205 OMPBuilder->popFinalizationCB();
1206 }
1207 llvm::OpenMPIRBuilder *OMPBuilder;
1208};
1209} // namespace
1210
1212 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1213 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1214 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1215 assert(ThreadIDVar->getType()->isPointerType() &&
1216 "thread id variable must be of type kmp_int32 *");
1217 CodeGenFunction CGF(CGM, true);
1218 bool HasCancel = false;
1219 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1220 HasCancel = OPD->hasCancel();
1221 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1222 HasCancel = OPD->hasCancel();
1223 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1224 HasCancel = OPSD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1230 HasCancel = OPFD->hasCancel();
1231 else if (const auto *OPFD =
1232 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1233 HasCancel = OPFD->hasCancel();
1234 else if (const auto *OPFD =
1235 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237
1238 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1239 // parallel region to make cancellation barriers work properly.
1240 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1241 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1242 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1243 HasCancel, OutlinedHelperName);
1244 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1245 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1246}
1247
1248std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1249 std::string Suffix = getName({"omp_outlined"});
1250 return (Name + Suffix).str();
1251}
1252
1254 return getOutlinedHelperName(CGF.CurFn->getName());
1255}
1256
1257std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1258 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1259 return (Name + Suffix).str();
1260}
1261
1264 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1265 const RegionCodeGenTy &CodeGen) {
1266 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1268 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1269 CodeGen);
1270}
1271
1274 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1275 const RegionCodeGenTy &CodeGen) {
1276 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1278 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1279 CodeGen);
1280}
1281
1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1286 bool Tied, unsigned &NumberOfParts) {
1287 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1288 PrePostActionTy &) {
1289 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1290 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1291 llvm::Value *TaskArgs[] = {
1292 UpLoc, ThreadID,
1293 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1294 TaskTVar->getType()->castAs<PointerType>())
1295 .getPointer(CGF)};
1296 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1297 CGM.getModule(), OMPRTL___kmpc_omp_task),
1298 TaskArgs);
1299 };
1300 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1301 UntiedCodeGen);
1302 CodeGen.setAction(Action);
1303 assert(!ThreadIDVar->getType()->isPointerType() &&
1304 "thread id variable must be of type kmp_int32 for tasks");
1305 const OpenMPDirectiveKind Region =
1306 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1307 : OMPD_task;
1308 const CapturedStmt *CS = D.getCapturedStmt(Region);
1309 bool HasCancel = false;
1310 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1313 HasCancel = TD->hasCancel();
1314 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1315 HasCancel = TD->hasCancel();
1316 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1317 HasCancel = TD->hasCancel();
1318
1319 CodeGenFunction CGF(CGM, true);
1320 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1321 InnermostKind, HasCancel, Action);
1322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1323 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1324 if (!Tied)
1325 NumberOfParts = Action.getNumberOfParts();
1326 return Res;
1327}
1328
1330 bool AtCurrentPoint) {
1331 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1332 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1333
1334 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1335 if (AtCurrentPoint) {
1336 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1337 CGF.Builder.GetInsertBlock());
1338 } else {
1339 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1341 }
1342}
1343
1345 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1346 if (Elem.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1348 Elem.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1359 OS << ";";
1360 if (auto *DbgInfo = CGF.getDebugInfo())
1361 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1362 else
1363 OS << PLoc.getFilename();
1364 OS << ";";
1365 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1366 OS << FD->getQualifiedNameAsString();
1367 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1368 return OS.str();
1369}
1370
1372 SourceLocation Loc,
1373 unsigned Flags, bool EmitLoc) {
1374 uint32_t SrcLocStrSize;
1375 llvm::Constant *SrcLocStr;
1376 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1377 llvm::codegenoptions::NoDebugInfo) ||
1378 Loc.isInvalid()) {
1379 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1380 } else {
1381 std::string FunctionName;
1382 std::string FileName;
1383 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1384 FunctionName = FD->getQualifiedNameAsString();
1386 if (auto *DbgInfo = CGF.getDebugInfo())
1387 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1388 else
1389 FileName = PLoc.getFilename();
1390 unsigned Line = PLoc.getLine();
1391 unsigned Column = PLoc.getColumn();
1392 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1393 Column, SrcLocStrSize);
1394 }
1395 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1396 return OMPBuilder.getOrCreateIdent(
1397 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1398}
1399
1401 SourceLocation Loc) {
1402 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1403 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1404 // the clang invariants used below might be broken.
1405 if (CGM.getLangOpts().OpenMPIRBuilder) {
1406 SmallString<128> Buffer;
1407 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1408 uint32_t SrcLocStrSize;
1409 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1410 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1411 return OMPBuilder.getOrCreateThreadID(
1412 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1413 }
1414
1415 llvm::Value *ThreadID = nullptr;
1416 // Check whether we've already cached a load of the thread id in this
1417 // function.
1418 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1419 if (I != OpenMPLocThreadIDMap.end()) {
1420 ThreadID = I->second.ThreadID;
1421 if (ThreadID != nullptr)
1422 return ThreadID;
1423 }
1424 // If exceptions are enabled, do not use parameter to avoid possible crash.
1425 if (auto *OMPRegionInfo =
1426 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1427 if (OMPRegionInfo->getThreadIDVariable()) {
1428 // Check if this an outlined function with thread id passed as argument.
1429 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1430 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1431 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1432 !CGF.getLangOpts().CXXExceptions ||
1433 CGF.Builder.GetInsertBlock() == TopBlock ||
1434 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1435 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1436 TopBlock ||
1437 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1438 CGF.Builder.GetInsertBlock()) {
1439 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1440 // If value loaded in entry block, cache it and use it everywhere in
1441 // function.
1442 if (CGF.Builder.GetInsertBlock() == TopBlock)
1443 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1444 return ThreadID;
1445 }
1446 }
1447 }
1448
1449 // This is not an outlined function region - need to call __kmpc_int32
1450 // kmpc_global_thread_num(ident_t *loc).
1451 // Generate thread id value and cache this value for use across the
1452 // function.
1453 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1454 if (!Elem.ServiceInsertPt)
1456 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1457 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1459 llvm::CallInst *Call = CGF.Builder.CreateCall(
1460 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1461 OMPRTL___kmpc_global_thread_num),
1462 emitUpdateLocation(CGF, Loc));
1463 Call->setCallingConv(CGF.getRuntimeCC());
1464 Elem.ThreadID = Call;
1465 return Call;
1466}
1467
1469 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1470 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1472 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1473 }
1474 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1475 for (const auto *D : I->second)
1476 UDRMap.erase(D);
1477 FunctionUDRMap.erase(I);
1478 }
1479 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1480 for (const auto *D : I->second)
1481 UDMMap.erase(D);
1482 FunctionUDMMap.erase(I);
1483 }
1486}
1487
1489 return OMPBuilder.IdentPtr;
1490}
1491
1492static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1494 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1495 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1496 if (!DevTy)
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1498
1499 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1500 case OMPDeclareTargetDeclAttr::DT_Host:
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1502 break;
1503 case OMPDeclareTargetDeclAttr::DT_NoHost:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1505 break;
1506 case OMPDeclareTargetDeclAttr::DT_Any:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1508 break;
1509 default:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1511 break;
1512 }
1513}
1514
1515static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1517 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1519 if (!MapType)
1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1521 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1524 break;
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1527 break;
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1530 break;
1531 default:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1533 break;
1534 }
1535}
1536
1537static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1538 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1539 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1540
1541 auto FileInfoCallBack = [&]() {
1543 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1544
1545 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1546 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1547
1548 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1549 };
1550
1551 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1552 *CGM.getFileSystem(), ParentName);
1553}
1554
1556 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1557
1558 auto LinkageForVariable = [&VD, this]() {
1559 return CGM.getLLVMLinkageVarDefinition(VD);
1560 };
1561
1562 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1563
1564 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1565 CGM.getContext().getPointerType(VD->getType()));
1566 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1568 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1569 VD->isExternallyVisible(),
1571 VD->getCanonicalDecl()->getBeginLoc()),
1572 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1573 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1574 LinkageForVariable);
1575
1576 if (!addr)
1577 return ConstantAddress::invalid();
1578 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1579}
1580
1581llvm::Constant *
1583 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1584 !CGM.getContext().getTargetInfo().isTLSSupported());
1585 // Lookup the entry, lazily creating it if necessary.
1586 std::string Suffix = getName({"cache", ""});
1587 return OMPBuilder.getOrCreateInternalVariable(
1588 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1589}
1590
1592 const VarDecl *VD,
1593 Address VDAddr,
1594 SourceLocation Loc) {
1595 if (CGM.getLangOpts().OpenMPUseTLS &&
1596 CGM.getContext().getTargetInfo().isTLSSupported())
1597 return VDAddr;
1598
1599 llvm::Type *VarTy = VDAddr.getElementType();
1600 llvm::Value *Args[] = {
1601 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1602 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1603 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1605 return Address(
1606 CGF.EmitRuntimeCall(
1607 OMPBuilder.getOrCreateRuntimeFunction(
1608 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1609 Args),
1610 CGF.Int8Ty, VDAddr.getAlignment());
1611}
1612
1614 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1615 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1616 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1617 // library.
1618 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1619 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1620 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1621 OMPLoc);
1622 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1623 // to register constructor/destructor for variable.
1624 llvm::Value *Args[] = {
1625 OMPLoc,
1626 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1627 Ctor, CopyCtor, Dtor};
1628 CGF.EmitRuntimeCall(
1629 OMPBuilder.getOrCreateRuntimeFunction(
1630 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1631 Args);
1632}
1633
1635 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1636 bool PerformInit, CodeGenFunction *CGF) {
1637 if (CGM.getLangOpts().OpenMPUseTLS &&
1638 CGM.getContext().getTargetInfo().isTLSSupported())
1639 return nullptr;
1640
1641 VD = VD->getDefinition(CGM.getContext());
1642 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1643 QualType ASTTy = VD->getType();
1644
1645 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1646 const Expr *Init = VD->getAnyInitializer();
1647 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1648 // Generate function that re-emits the declaration's initializer into the
1649 // threadprivate copy of the variable VD
1650 CodeGenFunction CtorCGF(CGM);
1651 FunctionArgList Args;
1652 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1653 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1655 Args.push_back(&Dst);
1656
1657 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1658 CGM.getContext().VoidPtrTy, Args);
1659 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1660 std::string Name = getName({"__kmpc_global_ctor_", ""});
1661 llvm::Function *Fn =
1662 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1663 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1664 Args, Loc, Loc);
1665 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1666 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1667 CGM.getContext().VoidPtrTy, Dst.getLocation());
1668 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1669 VDAddr.getAlignment());
1670 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1671 /*IsInitializer=*/true);
1672 ArgVal = CtorCGF.EmitLoadOfScalar(
1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1674 CGM.getContext().VoidPtrTy, Dst.getLocation());
1675 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1676 CtorCGF.FinishFunction();
1677 Ctor = Fn;
1678 }
1680 // Generate function that emits destructor call for the threadprivate copy
1681 // of the variable VD
1682 CodeGenFunction DtorCGF(CGM);
1683 FunctionArgList Args;
1684 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1685 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1687 Args.push_back(&Dst);
1688
1689 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1690 CGM.getContext().VoidTy, Args);
1691 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1692 std::string Name = getName({"__kmpc_global_dtor_", ""});
1693 llvm::Function *Fn =
1694 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1695 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1696 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1697 Loc, Loc);
1698 // Create a scope with an artificial location for the body of this function.
1699 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1700 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1701 DtorCGF.GetAddrOfLocalVar(&Dst),
1702 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1703 DtorCGF.emitDestroy(
1704 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1705 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1706 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1707 DtorCGF.FinishFunction();
1708 Dtor = Fn;
1709 }
1710 // Do not emit init function if it is not required.
1711 if (!Ctor && !Dtor)
1712 return nullptr;
1713
1714 // Copying constructor for the threadprivate variable.
1715 // Must be NULL - reserved by runtime, but currently it requires that this
1716 // parameter is always NULL. Otherwise it fires assertion.
1717 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1718 if (Ctor == nullptr) {
1719 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1720 }
1721 if (Dtor == nullptr) {
1722 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1723 }
1724 if (!CGF) {
1725 auto *InitFunctionTy =
1726 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1727 std::string Name = getName({"__omp_threadprivate_init_", ""});
1728 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1729 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1730 CodeGenFunction InitCGF(CGM);
1731 FunctionArgList ArgList;
1732 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1733 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1734 Loc, Loc);
1735 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1736 InitCGF.FinishFunction();
1737 return InitFunction;
1738 }
1739 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1740 }
1741 return nullptr;
1742}
1743
1745 llvm::GlobalValue *GV) {
1746 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1747 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1748
1749 // We only need to handle active 'indirect' declare target functions.
1750 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1751 return;
1752
1753 // Get a mangled name to store the new device global in.
1754 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1756 SmallString<128> Name;
1757 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1758
1759 // We need to generate a new global to hold the address of the indirectly
1760 // called device function. Doing this allows us to keep the visibility and
1761 // linkage of the associated function unchanged while allowing the runtime to
1762 // access its value.
1763 llvm::GlobalValue *Addr = GV;
1764 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1765 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1766 CGM.getLLVMContext(),
1767 CGM.getModule().getDataLayout().getProgramAddressSpace());
1768 Addr = new llvm::GlobalVariable(
1769 CGM.getModule(), FnPtrTy,
1770 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1771 nullptr, llvm::GlobalValue::NotThreadLocal,
1772 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1773 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1774 }
1775
1776 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1777 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1778 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1779 llvm::GlobalValue::WeakODRLinkage);
1780}
1781
1783 QualType VarType,
1784 StringRef Name) {
1785 std::string Suffix = getName({"artificial", ""});
1786 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1787 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1788 VarLVType, Twine(Name).concat(Suffix).str());
1789 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1790 CGM.getTarget().isTLSSupported()) {
1791 GAddr->setThreadLocal(/*Val=*/true);
1792 return Address(GAddr, GAddr->getValueType(),
1793 CGM.getContext().getTypeAlignInChars(VarType));
1794 }
1795 std::string CacheSuffix = getName({"cache", ""});
1796 llvm::Value *Args[] = {
1799 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1800 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1801 /*isSigned=*/false),
1802 OMPBuilder.getOrCreateInternalVariable(
1803 CGM.VoidPtrPtrTy,
1804 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1805 return Address(
1807 CGF.EmitRuntimeCall(
1808 OMPBuilder.getOrCreateRuntimeFunction(
1809 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1810 Args),
1811 CGF.Builder.getPtrTy(0)),
1812 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1813}
1814
1816 const RegionCodeGenTy &ThenGen,
1817 const RegionCodeGenTy &ElseGen) {
1818 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1819
1820 // If the condition constant folds and can be elided, try to avoid emitting
1821 // the condition and the dead arm of the if/else.
1822 bool CondConstant;
1823 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1824 if (CondConstant)
1825 ThenGen(CGF);
1826 else
1827 ElseGen(CGF);
1828 return;
1829 }
1830
1831 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1832 // emit the conditional branch.
1833 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1834 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1835 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1836 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1837
1838 // Emit the 'then' code.
1839 CGF.EmitBlock(ThenBlock);
1840 ThenGen(CGF);
1841 CGF.EmitBranch(ContBlock);
1842 // Emit the 'else' code if present.
1843 // There is no need to emit line number for unconditional branch.
1845 CGF.EmitBlock(ElseBlock);
1846 ElseGen(CGF);
1847 // There is no need to emit line number for unconditional branch.
1849 CGF.EmitBranch(ContBlock);
1850 // Emit the continuation block for code after the if.
1851 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1852}
1853
1855 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1856 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1857 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1858 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1859 if (!CGF.HaveInsertPoint())
1860 return;
1861 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1862 auto &M = CGM.getModule();
1863 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1864 this](CodeGenFunction &CGF, PrePostActionTy &) {
1865 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1866 llvm::Value *Args[] = {
1867 RTLoc,
1868 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1869 OutlinedFn};
1871 RealArgs.append(std::begin(Args), std::end(Args));
1872 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1873
1874 llvm::FunctionCallee RTLFn =
1875 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1876 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1877 };
1878 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1879 this](CodeGenFunction &CGF, PrePostActionTy &) {
1881 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1882 // Build calls:
1883 // __kmpc_serialized_parallel(&Loc, GTid);
1884 llvm::Value *Args[] = {RTLoc, ThreadID};
1885 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1886 M, OMPRTL___kmpc_serialized_parallel),
1887 Args);
1888
1889 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1890 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1891 RawAddress ZeroAddrBound =
1893 /*Name=*/".bound.zero.addr");
1894 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1896 // ThreadId for serialized parallels is 0.
1897 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1898 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1899 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1900
1901 // Ensure we do not inline the function. This is trivially true for the ones
1902 // passed to __kmpc_fork_call but the ones called in serialized regions
1903 // could be inlined. This is not a perfect but it is closer to the invariant
1904 // we want, namely, every data environment starts with a new function.
1905 // TODO: We should pass the if condition to the runtime function and do the
1906 // handling there. Much cleaner code.
1907 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1908 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1909 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1910
1911 // __kmpc_end_serialized_parallel(&Loc, GTid);
1912 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1913 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1914 M, OMPRTL___kmpc_end_serialized_parallel),
1915 EndArgs);
1916 };
1917 if (IfCond) {
1918 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1919 } else {
1920 RegionCodeGenTy ThenRCG(ThenGen);
1921 ThenRCG(CGF);
1922 }
1923}
1924
1925// If we're inside an (outlined) parallel region, use the region info's
1926// thread-ID variable (it is passed in a first argument of the outlined function
1927// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1928// regular serial code region, get thread ID by calling kmp_int32
1929// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1930// return the address of that temp.
1932 SourceLocation Loc) {
1933 if (auto *OMPRegionInfo =
1934 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1935 if (OMPRegionInfo->getThreadIDVariable())
1936 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1937
1938 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1939 QualType Int32Ty =
1940 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1941 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1942 CGF.EmitStoreOfScalar(ThreadID,
1943 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1944
1945 return ThreadIDTemp;
1946}
1947
1948llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1949 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1950 std::string Name = getName({Prefix, "var"});
1951 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1952}
1953
1954namespace {
1955/// Common pre(post)-action for different OpenMP constructs.
1956class CommonActionTy final : public PrePostActionTy {
1957 llvm::FunctionCallee EnterCallee;
1958 ArrayRef<llvm::Value *> EnterArgs;
1959 llvm::FunctionCallee ExitCallee;
1960 ArrayRef<llvm::Value *> ExitArgs;
1961 bool Conditional;
1962 llvm::BasicBlock *ContBlock = nullptr;
1963
1964public:
1965 CommonActionTy(llvm::FunctionCallee EnterCallee,
1966 ArrayRef<llvm::Value *> EnterArgs,
1967 llvm::FunctionCallee ExitCallee,
1968 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1969 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1970 ExitArgs(ExitArgs), Conditional(Conditional) {}
1971 void Enter(CodeGenFunction &CGF) override {
1972 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1973 if (Conditional) {
1974 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1975 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1976 ContBlock = CGF.createBasicBlock("omp_if.end");
1977 // Generate the branch (If-stmt)
1978 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1979 CGF.EmitBlock(ThenBlock);
1980 }
1981 }
1982 void Done(CodeGenFunction &CGF) {
1983 // Emit the rest of blocks/branches
1984 CGF.EmitBranch(ContBlock);
1985 CGF.EmitBlock(ContBlock, true);
1986 }
1987 void Exit(CodeGenFunction &CGF) override {
1988 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1989 }
1990};
1991} // anonymous namespace
1992
1994 StringRef CriticalName,
1995 const RegionCodeGenTy &CriticalOpGen,
1996 SourceLocation Loc, const Expr *Hint) {
1997 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1998 // CriticalOpGen();
1999 // __kmpc_end_critical(ident_t *, gtid, Lock);
2000 // Prepare arguments and build a call to __kmpc_critical
2001 if (!CGF.HaveInsertPoint())
2002 return;
2003 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2004 getCriticalRegionLock(CriticalName)};
2005 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2006 std::end(Args));
2007 if (Hint) {
2008 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2009 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2010 }
2011 CommonActionTy Action(
2012 OMPBuilder.getOrCreateRuntimeFunction(
2013 CGM.getModule(),
2014 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2015 EnterArgs,
2016 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2017 OMPRTL___kmpc_end_critical),
2018 Args);
2019 CriticalOpGen.setAction(Action);
2020 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2021}
2022
2024 const RegionCodeGenTy &MasterOpGen,
2025 SourceLocation Loc) {
2026 if (!CGF.HaveInsertPoint())
2027 return;
2028 // if(__kmpc_master(ident_t *, gtid)) {
2029 // MasterOpGen();
2030 // __kmpc_end_master(ident_t *, gtid);
2031 // }
2032 // Prepare arguments and build a call to __kmpc_master
2033 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2034 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2035 CGM.getModule(), OMPRTL___kmpc_master),
2036 Args,
2037 OMPBuilder.getOrCreateRuntimeFunction(
2038 CGM.getModule(), OMPRTL___kmpc_end_master),
2039 Args,
2040 /*Conditional=*/true);
2041 MasterOpGen.setAction(Action);
2042 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2043 Action.Done(CGF);
2044}
2045
2047 const RegionCodeGenTy &MaskedOpGen,
2048 SourceLocation Loc, const Expr *Filter) {
2049 if (!CGF.HaveInsertPoint())
2050 return;
2051 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2052 // MaskedOpGen();
2053 // __kmpc_end_masked(iden_t *, gtid);
2054 // }
2055 // Prepare arguments and build a call to __kmpc_masked
2056 llvm::Value *FilterVal = Filter
2057 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2058 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2059 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2060 FilterVal};
2061 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2062 getThreadID(CGF, Loc)};
2063 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2064 CGM.getModule(), OMPRTL___kmpc_masked),
2065 Args,
2066 OMPBuilder.getOrCreateRuntimeFunction(
2067 CGM.getModule(), OMPRTL___kmpc_end_masked),
2068 ArgsEnd,
2069 /*Conditional=*/true);
2070 MaskedOpGen.setAction(Action);
2071 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2072 Action.Done(CGF);
2073}
2074
2076 SourceLocation Loc) {
2077 if (!CGF.HaveInsertPoint())
2078 return;
2079 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2080 OMPBuilder.createTaskyield(CGF.Builder);
2081 } else {
2082 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2083 llvm::Value *Args[] = {
2084 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2085 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2086 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2087 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2088 Args);
2089 }
2090
2091 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2092 Region->emitUntiedSwitch(CGF);
2093}
2094
2096 const RegionCodeGenTy &TaskgroupOpGen,
2097 SourceLocation Loc) {
2098 if (!CGF.HaveInsertPoint())
2099 return;
2100 // __kmpc_taskgroup(ident_t *, gtid);
2101 // TaskgroupOpGen();
2102 // __kmpc_end_taskgroup(ident_t *, gtid);
2103 // Prepare arguments and build a call to __kmpc_taskgroup
2104 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2105 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2106 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2107 Args,
2108 OMPBuilder.getOrCreateRuntimeFunction(
2109 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2110 Args);
2111 TaskgroupOpGen.setAction(Action);
2112 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2113}
2114
2115/// Given an array of pointers to variables, project the address of a
2116/// given variable.
2118 unsigned Index, const VarDecl *Var) {
2119 // Pull out the pointer to the variable.
2120 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2121 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2122
2123 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2124 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2125}
2126
2128 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2129 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2130 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2131 SourceLocation Loc) {
2132 ASTContext &C = CGM.getContext();
2133 // void copy_func(void *LHSArg, void *RHSArg);
2134 FunctionArgList Args;
2135 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2137 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2139 Args.push_back(&LHSArg);
2140 Args.push_back(&RHSArg);
2141 const auto &CGFI =
2142 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2143 std::string Name =
2144 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2145 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2146 llvm::GlobalValue::InternalLinkage, Name,
2147 &CGM.getModule());
2149 Fn->setDoesNotRecurse();
2150 CodeGenFunction CGF(CGM);
2151 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2152 // Dest = (void*[n])(LHSArg);
2153 // Src = (void*[n])(RHSArg);
2155 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2156 CGF.Builder.getPtrTy(0)),
2157 ArgsElemType, CGF.getPointerAlign());
2159 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2160 CGF.Builder.getPtrTy(0)),
2161 ArgsElemType, CGF.getPointerAlign());
2162 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2163 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2164 // ...
2165 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2166 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2167 const auto *DestVar =
2168 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2169 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2170
2171 const auto *SrcVar =
2172 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2173 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2174
2175 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2176 QualType Type = VD->getType();
2177 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2178 }
2179 CGF.FinishFunction();
2180 return Fn;
2181}
2182
2184 const RegionCodeGenTy &SingleOpGen,
2185 SourceLocation Loc,
2186 ArrayRef<const Expr *> CopyprivateVars,
2187 ArrayRef<const Expr *> SrcExprs,
2188 ArrayRef<const Expr *> DstExprs,
2189 ArrayRef<const Expr *> AssignmentOps) {
2190 if (!CGF.HaveInsertPoint())
2191 return;
2192 assert(CopyprivateVars.size() == SrcExprs.size() &&
2193 CopyprivateVars.size() == DstExprs.size() &&
2194 CopyprivateVars.size() == AssignmentOps.size());
2195 ASTContext &C = CGM.getContext();
2196 // int32 did_it = 0;
2197 // if(__kmpc_single(ident_t *, gtid)) {
2198 // SingleOpGen();
2199 // __kmpc_end_single(ident_t *, gtid);
2200 // did_it = 1;
2201 // }
2202 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2203 // <copy_func>, did_it);
2204
2205 Address DidIt = Address::invalid();
2206 if (!CopyprivateVars.empty()) {
2207 // int32 did_it = 0;
2208 QualType KmpInt32Ty =
2209 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2210 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2211 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2212 }
2213 // Prepare arguments and build a call to __kmpc_single
2214 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2215 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2216 CGM.getModule(), OMPRTL___kmpc_single),
2217 Args,
2218 OMPBuilder.getOrCreateRuntimeFunction(
2219 CGM.getModule(), OMPRTL___kmpc_end_single),
2220 Args,
2221 /*Conditional=*/true);
2222 SingleOpGen.setAction(Action);
2223 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2224 if (DidIt.isValid()) {
2225 // did_it = 1;
2226 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2227 }
2228 Action.Done(CGF);
2229 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2230 // <copy_func>, did_it);
2231 if (DidIt.isValid()) {
2232 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2233 QualType CopyprivateArrayTy = C.getConstantArrayType(
2234 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2235 /*IndexTypeQuals=*/0);
2236 // Create a list of all private variables for copyprivate.
2237 Address CopyprivateList =
2238 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2239 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2240 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2241 CGF.Builder.CreateStore(
2243 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2244 CGF.VoidPtrTy),
2245 Elem);
2246 }
2247 // Build function that copies private values from single region to all other
2248 // threads in the corresponding parallel region.
2249 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2250 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2251 SrcExprs, DstExprs, AssignmentOps, Loc);
2252 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2254 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2255 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2256 llvm::Value *Args[] = {
2257 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2258 getThreadID(CGF, Loc), // i32 <gtid>
2259 BufSize, // size_t <buf_size>
2260 CL.emitRawPointer(CGF), // void *<copyprivate list>
2261 CpyFn, // void (*) (void *, void *) <copy_func>
2262 DidItVal // i32 did_it
2263 };
2264 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2265 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2266 Args);
2267 }
2268}
2269
2271 const RegionCodeGenTy &OrderedOpGen,
2272 SourceLocation Loc, bool IsThreads) {
2273 if (!CGF.HaveInsertPoint())
2274 return;
2275 // __kmpc_ordered(ident_t *, gtid);
2276 // OrderedOpGen();
2277 // __kmpc_end_ordered(ident_t *, gtid);
2278 // Prepare arguments and build a call to __kmpc_ordered
2279 if (IsThreads) {
2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2281 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2282 CGM.getModule(), OMPRTL___kmpc_ordered),
2283 Args,
2284 OMPBuilder.getOrCreateRuntimeFunction(
2285 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2286 Args);
2287 OrderedOpGen.setAction(Action);
2288 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2289 return;
2290 }
2291 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2292}
2293
2295 unsigned Flags;
2296 if (Kind == OMPD_for)
2297 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2298 else if (Kind == OMPD_sections)
2299 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2300 else if (Kind == OMPD_single)
2301 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2302 else if (Kind == OMPD_barrier)
2303 Flags = OMP_IDENT_BARRIER_EXPL;
2304 else
2305 Flags = OMP_IDENT_BARRIER_IMPL;
2306 return Flags;
2307}
2308
2310 CodeGenFunction &CGF, const OMPLoopDirective &S,
2311 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2312 // Check if the loop directive is actually a doacross loop directive. In this
2313 // case choose static, 1 schedule.
2314 if (llvm::any_of(
2315 S.getClausesOfKind<OMPOrderedClause>(),
2316 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2317 ScheduleKind = OMPC_SCHEDULE_static;
2318 // Chunk size is 1 in this case.
2319 llvm::APInt ChunkSize(32, 1);
2320 ChunkExpr = IntegerLiteral::Create(
2321 CGF.getContext(), ChunkSize,
2322 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2323 SourceLocation());
2324 }
2325}
2326
2328 OpenMPDirectiveKind Kind, bool EmitChecks,
2329 bool ForceSimpleCall) {
2330 // Check if we should use the OMPBuilder
2331 auto *OMPRegionInfo =
2332 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2333 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2334 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2335 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2336 EmitChecks));
2337 CGF.Builder.restoreIP(AfterIP);
2338 return;
2339 }
2340
2341 if (!CGF.HaveInsertPoint())
2342 return;
2343 // Build call __kmpc_cancel_barrier(loc, thread_id);
2344 // Build call __kmpc_barrier(loc, thread_id);
2345 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2346 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2347 // thread_id);
2348 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2349 getThreadID(CGF, Loc)};
2350 if (OMPRegionInfo) {
2351 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2352 llvm::Value *Result = CGF.EmitRuntimeCall(
2353 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2354 OMPRTL___kmpc_cancel_barrier),
2355 Args);
2356 if (EmitChecks) {
2357 // if (__kmpc_cancel_barrier()) {
2358 // exit from construct;
2359 // }
2360 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2361 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2362 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2363 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2364 CGF.EmitBlock(ExitBB);
2365 // exit from construct;
2366 CodeGenFunction::JumpDest CancelDestination =
2367 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2368 CGF.EmitBranchThroughCleanup(CancelDestination);
2369 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2370 }
2371 return;
2372 }
2373 }
2374 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2375 CGM.getModule(), OMPRTL___kmpc_barrier),
2376 Args);
2377}
2378
2380 Expr *ME, bool IsFatal) {
2381 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2382 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2383 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2384 // *message)
2385 llvm::Value *Args[] = {
2386 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2387 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2388 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2389 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2390 CGM.getModule(), OMPRTL___kmpc_error),
2391 Args);
2392}
2393
2394/// Map the OpenMP loop schedule to the runtime enumeration.
2395static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2396 bool Chunked, bool Ordered) {
2397 switch (ScheduleKind) {
2398 case OMPC_SCHEDULE_static:
2399 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2400 : (Ordered ? OMP_ord_static : OMP_sch_static);
2401 case OMPC_SCHEDULE_dynamic:
2402 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2403 case OMPC_SCHEDULE_guided:
2404 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2405 case OMPC_SCHEDULE_runtime:
2406 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2407 case OMPC_SCHEDULE_auto:
2408 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2410 assert(!Chunked && "chunk was specified but schedule kind not known");
2411 return Ordered ? OMP_ord_static : OMP_sch_static;
2412 }
2413 llvm_unreachable("Unexpected runtime schedule");
2414}
2415
2416/// Map the OpenMP distribute schedule to the runtime enumeration.
2417static OpenMPSchedType
2419 // only static is allowed for dist_schedule
2420 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2421}
2422
2424 bool Chunked) const {
2425 OpenMPSchedType Schedule =
2426 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2427 return Schedule == OMP_sch_static;
2428}
2429
2431 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2432 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2433 return Schedule == OMP_dist_sch_static;
2434}
2435
2437 bool Chunked) const {
2438 OpenMPSchedType Schedule =
2439 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2440 return Schedule == OMP_sch_static_chunked;
2441}
2442
2444 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2445 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2446 return Schedule == OMP_dist_sch_static_chunked;
2447}
2448
2450 OpenMPSchedType Schedule =
2451 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2452 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2453 return Schedule != OMP_sch_static;
2454}
2455
2456static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2459 int Modifier = 0;
2460 switch (M1) {
2461 case OMPC_SCHEDULE_MODIFIER_monotonic:
2462 Modifier = OMP_sch_modifier_monotonic;
2463 break;
2464 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2465 Modifier = OMP_sch_modifier_nonmonotonic;
2466 break;
2467 case OMPC_SCHEDULE_MODIFIER_simd:
2468 if (Schedule == OMP_sch_static_chunked)
2469 Schedule = OMP_sch_static_balanced_chunked;
2470 break;
2473 break;
2474 }
2475 switch (M2) {
2476 case OMPC_SCHEDULE_MODIFIER_monotonic:
2477 Modifier = OMP_sch_modifier_monotonic;
2478 break;
2479 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2480 Modifier = OMP_sch_modifier_nonmonotonic;
2481 break;
2482 case OMPC_SCHEDULE_MODIFIER_simd:
2483 if (Schedule == OMP_sch_static_chunked)
2484 Schedule = OMP_sch_static_balanced_chunked;
2485 break;
2488 break;
2489 }
2490 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2491 // If the static schedule kind is specified or if the ordered clause is
2492 // specified, and if the nonmonotonic modifier is not specified, the effect is
2493 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2494 // modifier is specified, the effect is as if the nonmonotonic modifier is
2495 // specified.
2496 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2497 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2498 Schedule == OMP_sch_static_balanced_chunked ||
2499 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2500 Schedule == OMP_dist_sch_static_chunked ||
2501 Schedule == OMP_dist_sch_static))
2502 Modifier = OMP_sch_modifier_nonmonotonic;
2503 }
2504 return Schedule | Modifier;
2505}
2506
2509 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2510 bool Ordered, const DispatchRTInput &DispatchValues) {
2511 if (!CGF.HaveInsertPoint())
2512 return;
2513 OpenMPSchedType Schedule = getRuntimeSchedule(
2514 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2515 assert(Ordered ||
2516 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2517 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2518 Schedule != OMP_sch_static_balanced_chunked));
2519 // Call __kmpc_dispatch_init(
2520 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2521 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2522 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2523
2524 // If the Chunk was not specified in the clause - use default value 1.
2525 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2526 : CGF.Builder.getIntN(IVSize, 1);
2527 llvm::Value *Args[] = {
2528 emitUpdateLocation(CGF, Loc),
2529 getThreadID(CGF, Loc),
2530 CGF.Builder.getInt32(addMonoNonMonoModifier(
2531 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2532 DispatchValues.LB, // Lower
2533 DispatchValues.UB, // Upper
2534 CGF.Builder.getIntN(IVSize, 1), // Stride
2535 Chunk // Chunk
2536 };
2537 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2538 Args);
2539}
2540
2542 SourceLocation Loc) {
2543 if (!CGF.HaveInsertPoint())
2544 return;
2545 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2546 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2547 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2548}
2549
2551 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2552 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2554 const CGOpenMPRuntime::StaticRTInput &Values) {
2555 if (!CGF.HaveInsertPoint())
2556 return;
2557
2558 assert(!Values.Ordered);
2559 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2560 Schedule == OMP_sch_static_balanced_chunked ||
2561 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2562 Schedule == OMP_dist_sch_static ||
2563 Schedule == OMP_dist_sch_static_chunked);
2564
2565 // Call __kmpc_for_static_init(
2566 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2567 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2568 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2569 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2570 llvm::Value *Chunk = Values.Chunk;
2571 if (Chunk == nullptr) {
2572 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2573 Schedule == OMP_dist_sch_static) &&
2574 "expected static non-chunked schedule");
2575 // If the Chunk was not specified in the clause - use default value 1.
2576 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2577 } else {
2578 assert((Schedule == OMP_sch_static_chunked ||
2579 Schedule == OMP_sch_static_balanced_chunked ||
2580 Schedule == OMP_ord_static_chunked ||
2581 Schedule == OMP_dist_sch_static_chunked) &&
2582 "expected static chunked schedule");
2583 }
2584 llvm::Value *Args[] = {
2585 UpdateLocation,
2586 ThreadId,
2587 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2588 M2)), // Schedule type
2589 Values.IL.emitRawPointer(CGF), // &isLastIter
2590 Values.LB.emitRawPointer(CGF), // &LB
2591 Values.UB.emitRawPointer(CGF), // &UB
2592 Values.ST.emitRawPointer(CGF), // &Stride
2593 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2594 Chunk // Chunk
2595 };
2596 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2597}
2598
2600 SourceLocation Loc,
2601 OpenMPDirectiveKind DKind,
2602 const OpenMPScheduleTy &ScheduleKind,
2603 const StaticRTInput &Values) {
2604 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2605 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2606 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2607 "Expected loop-based or sections-based directive.");
2608 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2610 ? OMP_IDENT_WORK_LOOP
2611 : OMP_IDENT_WORK_SECTIONS);
2612 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2613 llvm::FunctionCallee StaticInitFunction =
2614 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2615 false);
2617 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2618 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2619}
2620
2624 const CGOpenMPRuntime::StaticRTInput &Values) {
2625 OpenMPSchedType ScheduleNum =
2626 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2627 llvm::Value *UpdatedLocation =
2628 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2629 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2630 llvm::FunctionCallee StaticInitFunction;
2631 bool isGPUDistribute =
2632 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2633 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2634 Values.IVSize, Values.IVSigned, isGPUDistribute);
2635
2636 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2637 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2639}
2640
2642 SourceLocation Loc,
2643 OpenMPDirectiveKind DKind) {
2644 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2645 DKind == OMPD_sections) &&
2646 "Expected distribute, for, or sections directive kind");
2647 if (!CGF.HaveInsertPoint())
2648 return;
2649 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2650 llvm::Value *Args[] = {
2651 emitUpdateLocation(CGF, Loc,
2653 (DKind == OMPD_target_teams_loop)
2654 ? OMP_IDENT_WORK_DISTRIBUTE
2655 : isOpenMPLoopDirective(DKind)
2656 ? OMP_IDENT_WORK_LOOP
2657 : OMP_IDENT_WORK_SECTIONS),
2658 getThreadID(CGF, Loc)};
2660 if (isOpenMPDistributeDirective(DKind) &&
2661 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2662 CGF.EmitRuntimeCall(
2663 OMPBuilder.getOrCreateRuntimeFunction(
2664 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2665 Args);
2666 else
2667 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2668 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2669 Args);
2670}
2671
2673 SourceLocation Loc,
2674 unsigned IVSize,
2675 bool IVSigned) {
2676 if (!CGF.HaveInsertPoint())
2677 return;
2678 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2679 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2680 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2681 Args);
2682}
2683
2685 SourceLocation Loc, unsigned IVSize,
2686 bool IVSigned, Address IL,
2687 Address LB, Address UB,
2688 Address ST) {
2689 // Call __kmpc_dispatch_next(
2690 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2691 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2692 // kmp_int[32|64] *p_stride);
2693 llvm::Value *Args[] = {
2694 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2695 IL.emitRawPointer(CGF), // &isLastIter
2696 LB.emitRawPointer(CGF), // &Lower
2697 UB.emitRawPointer(CGF), // &Upper
2698 ST.emitRawPointer(CGF) // &Stride
2699 };
2700 llvm::Value *Call = CGF.EmitRuntimeCall(
2701 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2702 return CGF.EmitScalarConversion(
2703 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2704 CGF.getContext().BoolTy, Loc);
2705}
2706
2708 const Expr *Message,
2709 SourceLocation Loc) {
2710 if (!Message)
2711 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2712 return CGF.EmitScalarExpr(Message);
2713}
2714
2715llvm::Value *
2717 SourceLocation Loc) {
2718 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2719 // as if sev-level is fatal."
2720 return llvm::ConstantInt::get(CGM.Int32Ty,
2721 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2722}
2723
2725 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2727 SourceLocation SeverityLoc, const Expr *Message,
2728 SourceLocation MessageLoc) {
2729 if (!CGF.HaveInsertPoint())
2730 return;
2732 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2733 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2734 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2735 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2736 // messsage) if strict modifier is used.
2737 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2738 if (Modifier == OMPC_NUMTHREADS_strict) {
2739 FnID = OMPRTL___kmpc_push_num_threads_strict;
2740 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2741 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2742 }
2743 CGF.EmitRuntimeCall(
2744 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2745}
2746
2748 ProcBindKind ProcBind,
2749 SourceLocation Loc) {
2750 if (!CGF.HaveInsertPoint())
2751 return;
2752 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2753 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2754 llvm::Value *Args[] = {
2755 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2756 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2757 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2758 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2759 Args);
2760}
2761
2763 SourceLocation Loc, llvm::AtomicOrdering AO) {
2764 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2765 OMPBuilder.createFlush(CGF.Builder);
2766 } else {
2767 if (!CGF.HaveInsertPoint())
2768 return;
2769 // Build call void __kmpc_flush(ident_t *loc)
2770 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2771 CGM.getModule(), OMPRTL___kmpc_flush),
2772 emitUpdateLocation(CGF, Loc));
2773 }
2774}
2775
2776namespace {
2777/// Indexes of fields for type kmp_task_t.
2778enum KmpTaskTFields {
2779 /// List of shared variables.
2780 KmpTaskTShareds,
2781 /// Task routine.
2782 KmpTaskTRoutine,
2783 /// Partition id for the untied tasks.
2784 KmpTaskTPartId,
2785 /// Function with call of destructors for private variables.
2786 Data1,
2787 /// Task priority.
2788 Data2,
2789 /// (Taskloops only) Lower bound.
2790 KmpTaskTLowerBound,
2791 /// (Taskloops only) Upper bound.
2792 KmpTaskTUpperBound,
2793 /// (Taskloops only) Stride.
2794 KmpTaskTStride,
2795 /// (Taskloops only) Is last iteration flag.
2796 KmpTaskTLastIter,
2797 /// (Taskloops only) Reduction data.
2798 KmpTaskTReductions,
2799};
2800} // anonymous namespace
2801
2803 // If we are in simd mode or there are no entries, we don't need to do
2804 // anything.
2805 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2806 return;
2807
2808 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2809 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2810 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2811 SourceLocation Loc;
2812 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2813 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2814 E = CGM.getContext().getSourceManager().fileinfo_end();
2815 I != E; ++I) {
2816 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2817 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2818 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2819 I->getFirst(), EntryInfo.Line, 1);
2820 break;
2821 }
2822 }
2823 }
2824 switch (Kind) {
2825 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2826 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2827 DiagnosticsEngine::Error, "Offloading entry for target region in "
2828 "%0 is incorrect: either the "
2829 "address or the ID is invalid.");
2830 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2831 } break;
2832 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2833 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2834 DiagnosticsEngine::Error, "Offloading entry for declare target "
2835 "variable %0 is incorrect: the "
2836 "address is invalid.");
2837 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2838 } break;
2839 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2840 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2842 "Offloading entry for declare target variable is incorrect: the "
2843 "address is invalid.");
2844 CGM.getDiags().Report(DiagID);
2845 } break;
2846 }
2847 };
2848
2849 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2850}
2851
2853 if (!KmpRoutineEntryPtrTy) {
2854 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2855 ASTContext &C = CGM.getContext();
2856 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2858 KmpRoutineEntryPtrQTy = C.getPointerType(
2859 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2860 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2861 }
2862}
2863
2864namespace {
2865struct PrivateHelpersTy {
2866 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2867 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2868 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2869 PrivateElemInit(PrivateElemInit) {}
2870 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2871 const Expr *OriginalRef = nullptr;
2872 const VarDecl *Original = nullptr;
2873 const VarDecl *PrivateCopy = nullptr;
2874 const VarDecl *PrivateElemInit = nullptr;
2875 bool isLocalPrivate() const {
2876 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2877 }
2878};
2879typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2880} // anonymous namespace
2881
2882static bool isAllocatableDecl(const VarDecl *VD) {
2883 const VarDecl *CVD = VD->getCanonicalDecl();
2884 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2885 return false;
2886 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2887 // Use the default allocation.
2888 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2889 !AA->getAllocator());
2890}
2891
2892static RecordDecl *
2894 if (!Privates.empty()) {
2895 ASTContext &C = CGM.getContext();
2896 // Build struct .kmp_privates_t. {
2897 // /* private vars */
2898 // };
2899 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2900 RD->startDefinition();
2901 for (const auto &Pair : Privates) {
2902 const VarDecl *VD = Pair.second.Original;
2904 // If the private variable is a local variable with lvalue ref type,
2905 // allocate the pointer instead of the pointee type.
2906 if (Pair.second.isLocalPrivate()) {
2907 if (VD->getType()->isLValueReferenceType())
2908 Type = C.getPointerType(Type);
2909 if (isAllocatableDecl(VD))
2910 Type = C.getPointerType(Type);
2911 }
2913 if (VD->hasAttrs()) {
2914 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2915 E(VD->getAttrs().end());
2916 I != E; ++I)
2917 FD->addAttr(*I);
2918 }
2919 }
2920 RD->completeDefinition();
2921 return RD;
2922 }
2923 return nullptr;
2924}
2925
2926static RecordDecl *
2928 QualType KmpInt32Ty,
2929 QualType KmpRoutineEntryPointerQTy) {
2930 ASTContext &C = CGM.getContext();
2931 // Build struct kmp_task_t {
2932 // void * shareds;
2933 // kmp_routine_entry_t routine;
2934 // kmp_int32 part_id;
2935 // kmp_cmplrdata_t data1;
2936 // kmp_cmplrdata_t data2;
2937 // For taskloops additional fields:
2938 // kmp_uint64 lb;
2939 // kmp_uint64 ub;
2940 // kmp_int64 st;
2941 // kmp_int32 liter;
2942 // void * reductions;
2943 // };
2944 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2945 UD->startDefinition();
2946 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2947 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2948 UD->completeDefinition();
2949 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
2950 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2951 RD->startDefinition();
2952 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2953 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2954 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2955 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2956 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2957 if (isOpenMPTaskLoopDirective(Kind)) {
2958 QualType KmpUInt64Ty =
2959 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2960 QualType KmpInt64Ty =
2961 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2962 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2963 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2964 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2965 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2966 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2967 }
2968 RD->completeDefinition();
2969 return RD;
2970}
2971
2972static RecordDecl *
2974 ArrayRef<PrivateDataTy> Privates) {
2975 ASTContext &C = CGM.getContext();
2976 // Build struct kmp_task_t_with_privates {
2977 // kmp_task_t task_data;
2978 // .kmp_privates_t. privates;
2979 // };
2980 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2981 RD->startDefinition();
2982 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2983 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2984 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
2985 RD->completeDefinition();
2986 return RD;
2987}
2988
2989/// Emit a proxy function which accepts kmp_task_t as the second
2990/// argument.
2991/// \code
2992/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2993/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2994/// For taskloops:
2995/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2996/// tt->reductions, tt->shareds);
2997/// return 0;
2998/// }
2999/// \endcode
3000static llvm::Function *
3002 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3003 QualType KmpTaskTWithPrivatesPtrQTy,
3004 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3005 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3006 llvm::Value *TaskPrivatesMap) {
3007 ASTContext &C = CGM.getContext();
3008 FunctionArgList Args;
3009 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3011 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3012 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3014 Args.push_back(&GtidArg);
3015 Args.push_back(&TaskTypeArg);
3016 const auto &TaskEntryFnInfo =
3017 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3018 llvm::FunctionType *TaskEntryTy =
3019 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3020 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3021 auto *TaskEntry = llvm::Function::Create(
3022 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3023 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3024 TaskEntry->setDoesNotRecurse();
3025 CodeGenFunction CGF(CGM);
3026 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3027 Loc, Loc);
3028
3029 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3030 // tt,
3031 // For taskloops:
3032 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3033 // tt->task_data.shareds);
3034 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3035 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3036 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3037 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3038 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3039 const auto *KmpTaskTWithPrivatesQTyRD =
3040 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3041 LValue Base =
3042 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3043 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3044 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3045 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3046 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3047
3048 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3049 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3050 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3051 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3052 CGF.ConvertTypeForMem(SharedsPtrTy));
3053
3054 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3055 llvm::Value *PrivatesParam;
3056 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3057 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3058 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3059 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3060 } else {
3061 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3062 }
3063
3064 llvm::Value *CommonArgs[] = {
3065 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3066 CGF.Builder
3067 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3068 CGF.VoidPtrTy, CGF.Int8Ty)
3069 .emitRawPointer(CGF)};
3070 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3071 std::end(CommonArgs));
3072 if (isOpenMPTaskLoopDirective(Kind)) {
3073 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3074 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3075 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3076 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3077 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3078 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3079 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3080 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3081 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3082 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3083 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3084 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3085 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3086 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3087 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3088 CallArgs.push_back(LBParam);
3089 CallArgs.push_back(UBParam);
3090 CallArgs.push_back(StParam);
3091 CallArgs.push_back(LIParam);
3092 CallArgs.push_back(RParam);
3093 }
3094 CallArgs.push_back(SharedsParam);
3095
3096 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3097 CallArgs);
3098 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3099 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3100 CGF.FinishFunction();
3101 return TaskEntry;
3102}
3103
3105 SourceLocation Loc,
3106 QualType KmpInt32Ty,
3107 QualType KmpTaskTWithPrivatesPtrQTy,
3108 QualType KmpTaskTWithPrivatesQTy) {
3109 ASTContext &C = CGM.getContext();
3110 FunctionArgList Args;
3111 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3113 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3114 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3116 Args.push_back(&GtidArg);
3117 Args.push_back(&TaskTypeArg);
3118 const auto &DestructorFnInfo =
3119 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3120 llvm::FunctionType *DestructorFnTy =
3121 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3122 std::string Name =
3123 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3124 auto *DestructorFn =
3125 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3126 Name, &CGM.getModule());
3127 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3128 DestructorFnInfo);
3129 DestructorFn->setDoesNotRecurse();
3130 CodeGenFunction CGF(CGM);
3131 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3132 Args, Loc, Loc);
3133
3134 LValue Base = CGF.EmitLoadOfPointerLValue(
3135 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3136 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3137 const auto *KmpTaskTWithPrivatesQTyRD =
3138 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3139 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3140 Base = CGF.EmitLValueForField(Base, *FI);
3141 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3142 if (QualType::DestructionKind DtorKind =
3143 Field->getType().isDestructedType()) {
3144 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3145 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3146 }
3147 }
3148 CGF.FinishFunction();
3149 return DestructorFn;
3150}
3151
3152/// Emit a privates mapping function for correct handling of private and
3153/// firstprivate variables.
3154/// \code
3155/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3156/// **noalias priv1,..., <tyn> **noalias privn) {
3157/// *priv1 = &.privates.priv1;
3158/// ...;
3159/// *privn = &.privates.privn;
3160/// }
3161/// \endcode
3162static llvm::Value *
3164 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3165 ArrayRef<PrivateDataTy> Privates) {
3166 ASTContext &C = CGM.getContext();
3167 FunctionArgList Args;
3168 ImplicitParamDecl TaskPrivatesArg(
3169 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3170 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3172 Args.push_back(&TaskPrivatesArg);
3173 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3174 unsigned Counter = 1;
3175 for (const Expr *E : Data.PrivateVars) {
3176 Args.push_back(ImplicitParamDecl::Create(
3177 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3178 C.getPointerType(C.getPointerType(E->getType()))
3179 .withConst()
3180 .withRestrict(),
3182 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3183 PrivateVarsPos[VD] = Counter;
3184 ++Counter;
3185 }
3186 for (const Expr *E : Data.FirstprivateVars) {
3187 Args.push_back(ImplicitParamDecl::Create(
3188 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3189 C.getPointerType(C.getPointerType(E->getType()))
3190 .withConst()
3191 .withRestrict(),
3193 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3194 PrivateVarsPos[VD] = Counter;
3195 ++Counter;
3196 }
3197 for (const Expr *E : Data.LastprivateVars) {
3198 Args.push_back(ImplicitParamDecl::Create(
3199 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3200 C.getPointerType(C.getPointerType(E->getType()))
3201 .withConst()
3202 .withRestrict(),
3204 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3205 PrivateVarsPos[VD] = Counter;
3206 ++Counter;
3207 }
3208 for (const VarDecl *VD : Data.PrivateLocals) {
3210 if (VD->getType()->isLValueReferenceType())
3211 Ty = C.getPointerType(Ty);
3212 if (isAllocatableDecl(VD))
3213 Ty = C.getPointerType(Ty);
3214 Args.push_back(ImplicitParamDecl::Create(
3215 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3216 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3218 PrivateVarsPos[VD] = Counter;
3219 ++Counter;
3220 }
3221 const auto &TaskPrivatesMapFnInfo =
3222 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3223 llvm::FunctionType *TaskPrivatesMapTy =
3224 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3225 std::string Name =
3226 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3227 auto *TaskPrivatesMap = llvm::Function::Create(
3228 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3229 &CGM.getModule());
3230 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3231 TaskPrivatesMapFnInfo);
3232 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3233 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3234 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3235 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3236 }
3237 CodeGenFunction CGF(CGM);
3238 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3239 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3240
3241 // *privi = &.privates.privi;
3242 LValue Base = CGF.EmitLoadOfPointerLValue(
3243 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3244 TaskPrivatesArg.getType()->castAs<PointerType>());
3245 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3246 Counter = 0;
3247 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3248 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3249 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3250 LValue RefLVal =
3251 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3252 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3253 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3254 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3255 ++Counter;
3256 }
3257 CGF.FinishFunction();
3258 return TaskPrivatesMap;
3259}
3260
3261/// Emit initialization for private variables in task-based directives.
3263 const OMPExecutableDirective &D,
3264 Address KmpTaskSharedsPtr, LValue TDBase,
3265 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3266 QualType SharedsTy, QualType SharedsPtrTy,
3267 const OMPTaskDataTy &Data,
3268 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3269 ASTContext &C = CGF.getContext();
3270 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3271 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3272 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3273 ? OMPD_taskloop
3274 : OMPD_task;
3275 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3276 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3277 LValue SrcBase;
3278 bool IsTargetTask =
3279 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3280 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3281 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3282 // PointersArray, SizesArray, and MappersArray. The original variables for
3283 // these arrays are not captured and we get their addresses explicitly.
3284 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3285 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3286 SrcBase = CGF.MakeAddrLValue(
3288 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3289 CGF.ConvertTypeForMem(SharedsTy)),
3290 SharedsTy);
3291 }
3292 FI = FI->getType()->castAsRecordDecl()->field_begin();
3293 for (const PrivateDataTy &Pair : Privates) {
3294 // Do not initialize private locals.
3295 if (Pair.second.isLocalPrivate()) {
3296 ++FI;
3297 continue;
3298 }
3299 const VarDecl *VD = Pair.second.PrivateCopy;
3300 const Expr *Init = VD->getAnyInitializer();
3301 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3302 !CGF.isTrivialInitializer(Init)))) {
3303 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3304 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3305 const VarDecl *OriginalVD = Pair.second.Original;
3306 // Check if the variable is the target-based BasePointersArray,
3307 // PointersArray, SizesArray, or MappersArray.
3308 LValue SharedRefLValue;
3309 QualType Type = PrivateLValue.getType();
3310 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3311 if (IsTargetTask && !SharedField) {
3312 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3313 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3314 cast<CapturedDecl>(OriginalVD->getDeclContext())
3315 ->getNumParams() == 0 &&
3317 cast<CapturedDecl>(OriginalVD->getDeclContext())
3318 ->getDeclContext()) &&
3319 "Expected artificial target data variable.");
3320 SharedRefLValue =
3321 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3322 } else if (ForDup) {
3323 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3324 SharedRefLValue = CGF.MakeAddrLValue(
3325 SharedRefLValue.getAddress().withAlignment(
3326 C.getDeclAlign(OriginalVD)),
3327 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3328 SharedRefLValue.getTBAAInfo());
3329 } else if (CGF.LambdaCaptureFields.count(
3330 Pair.second.Original->getCanonicalDecl()) > 0 ||
3331 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3332 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3333 } else {
3334 // Processing for implicitly captured variables.
3335 InlinedOpenMPRegionRAII Region(
3336 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3337 /*HasCancel=*/false, /*NoInheritance=*/true);
3338 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3339 }
3340 if (Type->isArrayType()) {
3341 // Initialize firstprivate array.
3343 // Perform simple memcpy.
3344 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3345 } else {
3346 // Initialize firstprivate array using element-by-element
3347 // initialization.
3349 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3350 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3351 Address SrcElement) {
3352 // Clean up any temporaries needed by the initialization.
3353 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3354 InitScope.addPrivate(Elem, SrcElement);
3355 (void)InitScope.Privatize();
3356 // Emit initialization for single element.
3357 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3358 CGF, &CapturesInfo);
3359 CGF.EmitAnyExprToMem(Init, DestElement,
3360 Init->getType().getQualifiers(),
3361 /*IsInitializer=*/false);
3362 });
3363 }
3364 } else {
3365 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3366 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3367 (void)InitScope.Privatize();
3368 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3369 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3370 /*capturedByInit=*/false);
3371 }
3372 } else {
3373 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3374 }
3375 }
3376 ++FI;
3377 }
3378}
3379
3380/// Check if duplication function is required for taskloops.
3382 ArrayRef<PrivateDataTy> Privates) {
3383 bool InitRequired = false;
3384 for (const PrivateDataTy &Pair : Privates) {
3385 if (Pair.second.isLocalPrivate())
3386 continue;
3387 const VarDecl *VD = Pair.second.PrivateCopy;
3388 const Expr *Init = VD->getAnyInitializer();
3389 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3391 if (InitRequired)
3392 break;
3393 }
3394 return InitRequired;
3395}
3396
3397
3398/// Emit task_dup function (for initialization of
3399/// private/firstprivate/lastprivate vars and last_iter flag)
3400/// \code
3401/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3402/// lastpriv) {
3403/// // setup lastprivate flag
3404/// task_dst->last = lastpriv;
3405/// // could be constructor calls here...
3406/// }
3407/// \endcode
3408static llvm::Value *
3410 const OMPExecutableDirective &D,
3411 QualType KmpTaskTWithPrivatesPtrQTy,
3412 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3413 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3414 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3415 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3416 ASTContext &C = CGM.getContext();
3417 FunctionArgList Args;
3418 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3419 KmpTaskTWithPrivatesPtrQTy,
3421 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3422 KmpTaskTWithPrivatesPtrQTy,
3424 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3426 Args.push_back(&DstArg);
3427 Args.push_back(&SrcArg);
3428 Args.push_back(&LastprivArg);
3429 const auto &TaskDupFnInfo =
3430 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3431 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3432 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3433 auto *TaskDup = llvm::Function::Create(
3434 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3435 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3436 TaskDup->setDoesNotRecurse();
3437 CodeGenFunction CGF(CGM);
3438 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3439 Loc);
3440
3441 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3442 CGF.GetAddrOfLocalVar(&DstArg),
3443 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3444 // task_dst->liter = lastpriv;
3445 if (WithLastIter) {
3446 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3447 LValue Base = CGF.EmitLValueForField(
3448 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3449 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3450 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3451 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3452 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3453 }
3454
3455 // Emit initial values for private copies (if any).
3456 assert(!Privates.empty());
3457 Address KmpTaskSharedsPtr = Address::invalid();
3458 if (!Data.FirstprivateVars.empty()) {
3459 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3460 CGF.GetAddrOfLocalVar(&SrcArg),
3461 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3462 LValue Base = CGF.EmitLValueForField(
3463 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3464 KmpTaskSharedsPtr = Address(
3466 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3467 KmpTaskTShareds)),
3468 Loc),
3469 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3470 }
3471 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3472 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3473 CGF.FinishFunction();
3474 return TaskDup;
3475}
3476
3477/// Checks if destructor function is required to be generated.
3478/// \return true if cleanups are required, false otherwise.
3479static bool
3480checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3481 ArrayRef<PrivateDataTy> Privates) {
3482 for (const PrivateDataTy &P : Privates) {
3483 if (P.second.isLocalPrivate())
3484 continue;
3485 QualType Ty = P.second.Original->getType().getNonReferenceType();
3486 if (Ty.isDestructedType())
3487 return true;
3488 }
3489 return false;
3490}
3491
3492namespace {
3493/// Loop generator for OpenMP iterator expression.
3494class OMPIteratorGeneratorScope final
3496 CodeGenFunction &CGF;
3497 const OMPIteratorExpr *E = nullptr;
3498 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3499 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3500 OMPIteratorGeneratorScope() = delete;
3501 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3502
3503public:
3504 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3505 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3506 if (!E)
3507 return;
3508 SmallVector<llvm::Value *, 4> Uppers;
3509 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3510 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3511 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3512 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3513 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3514 addPrivate(
3515 HelperData.CounterVD,
3516 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3517 }
3518 Privatize();
3519
3520 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3521 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3522 LValue CLVal =
3523 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3524 HelperData.CounterVD->getType());
3525 // Counter = 0;
3526 CGF.EmitStoreOfScalar(
3527 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3528 CLVal);
3529 CodeGenFunction::JumpDest &ContDest =
3530 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3531 CodeGenFunction::JumpDest &ExitDest =
3532 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3533 // N = <number-of_iterations>;
3534 llvm::Value *N = Uppers[I];
3535 // cont:
3536 // if (Counter < N) goto body; else goto exit;
3537 CGF.EmitBlock(ContDest.getBlock());
3538 auto *CVal =
3539 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3540 llvm::Value *Cmp =
3541 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3542 ? CGF.Builder.CreateICmpSLT(CVal, N)
3543 : CGF.Builder.CreateICmpULT(CVal, N);
3544 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3545 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3546 // body:
3547 CGF.EmitBlock(BodyBB);
3548 // Iteri = Begini + Counter * Stepi;
3549 CGF.EmitIgnoredExpr(HelperData.Update);
3550 }
3551 }
3552 ~OMPIteratorGeneratorScope() {
3553 if (!E)
3554 return;
3555 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3556 // Counter = Counter + 1;
3557 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3558 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3559 // goto cont;
3560 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3561 // exit:
3562 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3563 }
3564 }
3565};
3566} // namespace
3567
3568static std::pair<llvm::Value *, llvm::Value *>
3570 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3571 llvm::Value *Addr;
3572 if (OASE) {
3573 const Expr *Base = OASE->getBase();
3574 Addr = CGF.EmitScalarExpr(Base);
3575 } else {
3576 Addr = CGF.EmitLValue(E).getPointer(CGF);
3577 }
3578 llvm::Value *SizeVal;
3579 QualType Ty = E->getType();
3580 if (OASE) {
3581 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3582 for (const Expr *SE : OASE->getDimensions()) {
3583 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3584 Sz = CGF.EmitScalarConversion(
3585 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3586 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3587 }
3588 } else if (const auto *ASE =
3589 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3590 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3591 Address UpAddrAddress = UpAddrLVal.getAddress();
3592 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3593 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3594 /*Idx0=*/1);
3595 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3596 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3597 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3598 } else {
3599 SizeVal = CGF.getTypeSize(Ty);
3600 }
3601 return std::make_pair(Addr, SizeVal);
3602}
3603
3604/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3605static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3606 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3607 if (KmpTaskAffinityInfoTy.isNull()) {
3608 RecordDecl *KmpAffinityInfoRD =
3609 C.buildImplicitRecord("kmp_task_affinity_info_t");
3610 KmpAffinityInfoRD->startDefinition();
3611 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3612 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3613 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3614 KmpAffinityInfoRD->completeDefinition();
3615 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3616 }
3617}
3618
3621 const OMPExecutableDirective &D,
3622 llvm::Function *TaskFunction, QualType SharedsTy,
3623 Address Shareds, const OMPTaskDataTy &Data) {
3624 ASTContext &C = CGM.getContext();
3626 // Aggregate privates and sort them by the alignment.
3627 const auto *I = Data.PrivateCopies.begin();
3628 for (const Expr *E : Data.PrivateVars) {
3629 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3630 Privates.emplace_back(
3631 C.getDeclAlign(VD),
3632 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3633 /*PrivateElemInit=*/nullptr));
3634 ++I;
3635 }
3636 I = Data.FirstprivateCopies.begin();
3637 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3638 for (const Expr *E : Data.FirstprivateVars) {
3639 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3640 Privates.emplace_back(
3641 C.getDeclAlign(VD),
3642 PrivateHelpersTy(
3643 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3644 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3645 ++I;
3646 ++IElemInitRef;
3647 }
3648 I = Data.LastprivateCopies.begin();
3649 for (const Expr *E : Data.LastprivateVars) {
3650 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3651 Privates.emplace_back(
3652 C.getDeclAlign(VD),
3653 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3654 /*PrivateElemInit=*/nullptr));
3655 ++I;
3656 }
3657 for (const VarDecl *VD : Data.PrivateLocals) {
3658 if (isAllocatableDecl(VD))
3659 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3660 else
3661 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3662 }
3663 llvm::stable_sort(Privates,
3664 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3665 return L.first > R.first;
3666 });
3667 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3668 // Build type kmp_routine_entry_t (if not built yet).
3669 emitKmpRoutineEntryT(KmpInt32Ty);
3670 // Build type kmp_task_t (if not built yet).
3671 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3672 if (SavedKmpTaskloopTQTy.isNull()) {
3673 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3674 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3675 }
3677 } else {
3678 assert((D.getDirectiveKind() == OMPD_task ||
3679 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3680 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3681 "Expected taskloop, task or target directive");
3682 if (SavedKmpTaskTQTy.isNull()) {
3683 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3684 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3685 }
3687 }
3688 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3689 // Build particular struct kmp_task_t for the given task.
3690 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3692 CanQualType KmpTaskTWithPrivatesQTy =
3693 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3694 QualType KmpTaskTWithPrivatesPtrQTy =
3695 C.getPointerType(KmpTaskTWithPrivatesQTy);
3696 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3697 llvm::Value *KmpTaskTWithPrivatesTySize =
3698 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3699 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3700
3701 // Emit initial values for private copies (if any).
3702 llvm::Value *TaskPrivatesMap = nullptr;
3703 llvm::Type *TaskPrivatesMapTy =
3704 std::next(TaskFunction->arg_begin(), 3)->getType();
3705 if (!Privates.empty()) {
3706 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3707 TaskPrivatesMap =
3708 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3709 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3710 TaskPrivatesMap, TaskPrivatesMapTy);
3711 } else {
3712 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3713 cast<llvm::PointerType>(TaskPrivatesMapTy));
3714 }
3715 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3716 // kmp_task_t *tt);
3717 llvm::Function *TaskEntry = emitProxyTaskFunction(
3718 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3719 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3720 TaskPrivatesMap);
3721
3722 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3723 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3724 // kmp_routine_entry_t *task_entry);
3725 // Task flags. Format is taken from
3726 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3727 // description of kmp_tasking_flags struct.
3728 enum {
3729 TiedFlag = 0x1,
3730 FinalFlag = 0x2,
3731 DestructorsFlag = 0x8,
3732 PriorityFlag = 0x20,
3733 DetachableFlag = 0x40,
3734 };
3735 unsigned Flags = Data.Tied ? TiedFlag : 0;
3736 bool NeedsCleanup = false;
3737 if (!Privates.empty()) {
3738 NeedsCleanup =
3739 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3740 if (NeedsCleanup)
3741 Flags = Flags | DestructorsFlag;
3742 }
3743 if (Data.Priority.getInt())
3744 Flags = Flags | PriorityFlag;
3745 if (D.hasClausesOfKind<OMPDetachClause>())
3746 Flags = Flags | DetachableFlag;
3747 llvm::Value *TaskFlags =
3748 Data.Final.getPointer()
3749 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3750 CGF.Builder.getInt32(FinalFlag),
3751 CGF.Builder.getInt32(/*C=*/0))
3752 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3753 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3754 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3756 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3758 TaskEntry, KmpRoutineEntryPtrTy)};
3759 llvm::Value *NewTask;
3760 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3761 // Check if we have any device clause associated with the directive.
3762 const Expr *Device = nullptr;
3763 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3764 Device = C->getDevice();
3765 // Emit device ID if any otherwise use default value.
3766 llvm::Value *DeviceID;
3767 if (Device)
3768 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3769 CGF.Int64Ty, /*isSigned=*/true);
3770 else
3771 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3772 AllocArgs.push_back(DeviceID);
3773 NewTask = CGF.EmitRuntimeCall(
3774 OMPBuilder.getOrCreateRuntimeFunction(
3775 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3776 AllocArgs);
3777 } else {
3778 NewTask =
3779 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3780 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3781 AllocArgs);
3782 }
3783 // Emit detach clause initialization.
3784 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3785 // task_descriptor);
3786 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3787 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3788 LValue EvtLVal = CGF.EmitLValue(Evt);
3789
3790 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3791 // int gtid, kmp_task_t *task);
3792 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3793 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3794 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3795 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3796 OMPBuilder.getOrCreateRuntimeFunction(
3797 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3798 {Loc, Tid, NewTask});
3799 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3800 Evt->getExprLoc());
3801 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3802 }
3803 // Process affinity clauses.
3804 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3805 // Process list of affinity data.
3806 ASTContext &C = CGM.getContext();
3807 Address AffinitiesArray = Address::invalid();
3808 // Calculate number of elements to form the array of affinity data.
3809 llvm::Value *NumOfElements = nullptr;
3810 unsigned NumAffinities = 0;
3811 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3812 if (const Expr *Modifier = C->getModifier()) {
3813 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3814 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3815 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3816 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3817 NumOfElements =
3818 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3819 }
3820 } else {
3821 NumAffinities += C->varlist_size();
3822 }
3823 }
3825 // Fields ids in kmp_task_affinity_info record.
3826 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3827
3828 QualType KmpTaskAffinityInfoArrayTy;
3829 if (NumOfElements) {
3830 NumOfElements = CGF.Builder.CreateNUWAdd(
3831 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3832 auto *OVE = new (C) OpaqueValueExpr(
3833 Loc,
3834 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3835 VK_PRValue);
3836 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3837 RValue::get(NumOfElements));
3838 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3840 /*IndexTypeQuals=*/0);
3841 // Properly emit variable-sized array.
3842 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3844 CGF.EmitVarDecl(*PD);
3845 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3846 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3847 /*isSigned=*/false);
3848 } else {
3849 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3851 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3852 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3853 AffinitiesArray =
3854 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3855 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3856 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3857 /*isSigned=*/false);
3858 }
3859
3860 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3861 // Fill array by elements without iterators.
3862 unsigned Pos = 0;
3863 bool HasIterator = false;
3864 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3865 if (C->getModifier()) {
3866 HasIterator = true;
3867 continue;
3868 }
3869 for (const Expr *E : C->varlist()) {
3870 llvm::Value *Addr;
3871 llvm::Value *Size;
3872 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3873 LValue Base =
3874 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3876 // affs[i].base_addr = &<Affinities[i].second>;
3877 LValue BaseAddrLVal = CGF.EmitLValueForField(
3878 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3879 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3880 BaseAddrLVal);
3881 // affs[i].len = sizeof(<Affinities[i].second>);
3882 LValue LenLVal = CGF.EmitLValueForField(
3883 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3884 CGF.EmitStoreOfScalar(Size, LenLVal);
3885 ++Pos;
3886 }
3887 }
3888 LValue PosLVal;
3889 if (HasIterator) {
3890 PosLVal = CGF.MakeAddrLValue(
3891 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3892 C.getSizeType());
3893 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3894 }
3895 // Process elements with iterators.
3896 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3897 const Expr *Modifier = C->getModifier();
3898 if (!Modifier)
3899 continue;
3900 OMPIteratorGeneratorScope IteratorScope(
3901 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3902 for (const Expr *E : C->varlist()) {
3903 llvm::Value *Addr;
3904 llvm::Value *Size;
3905 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3906 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3907 LValue Base =
3908 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3910 // affs[i].base_addr = &<Affinities[i].second>;
3911 LValue BaseAddrLVal = CGF.EmitLValueForField(
3912 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3913 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3914 BaseAddrLVal);
3915 // affs[i].len = sizeof(<Affinities[i].second>);
3916 LValue LenLVal = CGF.EmitLValueForField(
3917 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3918 CGF.EmitStoreOfScalar(Size, LenLVal);
3919 Idx = CGF.Builder.CreateNUWAdd(
3920 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3921 CGF.EmitStoreOfScalar(Idx, PosLVal);
3922 }
3923 }
3924 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3925 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3926 // naffins, kmp_task_affinity_info_t *affin_list);
3927 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3928 llvm::Value *GTid = getThreadID(CGF, Loc);
3929 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3930 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3931 // FIXME: Emit the function and ignore its result for now unless the
3932 // runtime function is properly implemented.
3933 (void)CGF.EmitRuntimeCall(
3934 OMPBuilder.getOrCreateRuntimeFunction(
3935 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3936 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3937 }
3938 llvm::Value *NewTaskNewTaskTTy =
3940 NewTask, KmpTaskTWithPrivatesPtrTy);
3941 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3942 KmpTaskTWithPrivatesQTy);
3943 LValue TDBase =
3944 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3945 // Fill the data in the resulting kmp_task_t record.
3946 // Copy shareds if there are any.
3947 Address KmpTaskSharedsPtr = Address::invalid();
3948 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
3949 KmpTaskSharedsPtr = Address(
3950 CGF.EmitLoadOfScalar(
3952 TDBase,
3953 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3954 Loc),
3955 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3956 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3957 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3958 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3959 }
3960 // Emit initial values for private copies (if any).
3962 if (!Privates.empty()) {
3963 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3964 SharedsTy, SharedsPtrTy, Data, Privates,
3965 /*ForDup=*/false);
3966 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3967 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3968 Result.TaskDupFn = emitTaskDupFunction(
3969 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3970 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3971 /*WithLastIter=*/!Data.LastprivateVars.empty());
3972 }
3973 }
3974 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3975 enum { Priority = 0, Destructors = 1 };
3976 // Provide pointer to function with destructors for privates.
3977 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3978 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
3979 assert(KmpCmplrdataUD->isUnion());
3980 if (NeedsCleanup) {
3981 llvm::Value *DestructorFn = emitDestructorsFunction(
3982 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3983 KmpTaskTWithPrivatesQTy);
3984 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3985 LValue DestructorsLV = CGF.EmitLValueForField(
3986 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3988 DestructorFn, KmpRoutineEntryPtrTy),
3989 DestructorsLV);
3990 }
3991 // Set priority.
3992 if (Data.Priority.getInt()) {
3993 LValue Data2LV = CGF.EmitLValueForField(
3994 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3995 LValue PriorityLV = CGF.EmitLValueForField(
3996 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3997 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3998 }
3999 Result.NewTask = NewTask;
4000 Result.TaskEntry = TaskEntry;
4001 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4002 Result.TDBase = TDBase;
4003 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4004 return Result;
4005}
4006
4007/// Translates internal dependency kind into the runtime kind.
4009 RTLDependenceKindTy DepKind;
4010 switch (K) {
4011 case OMPC_DEPEND_in:
4012 DepKind = RTLDependenceKindTy::DepIn;
4013 break;
4014 // Out and InOut dependencies must use the same code.
4015 case OMPC_DEPEND_out:
4016 case OMPC_DEPEND_inout:
4017 DepKind = RTLDependenceKindTy::DepInOut;
4018 break;
4019 case OMPC_DEPEND_mutexinoutset:
4020 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4021 break;
4022 case OMPC_DEPEND_inoutset:
4023 DepKind = RTLDependenceKindTy::DepInOutSet;
4024 break;
4025 case OMPC_DEPEND_outallmemory:
4026 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4027 break;
4028 case OMPC_DEPEND_source:
4029 case OMPC_DEPEND_sink:
4030 case OMPC_DEPEND_depobj:
4031 case OMPC_DEPEND_inoutallmemory:
4033 llvm_unreachable("Unknown task dependence type");
4034 }
4035 return DepKind;
4036}
4037
4038/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4039static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4040 QualType &FlagsTy) {
4041 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4042 if (KmpDependInfoTy.isNull()) {
4043 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4044 KmpDependInfoRD->startDefinition();
4045 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4046 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4047 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4048 KmpDependInfoRD->completeDefinition();
4049 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4050 }
4051}
4052
4053std::pair<llvm::Value *, LValue>
4055 SourceLocation Loc) {
4056 ASTContext &C = CGM.getContext();
4057 QualType FlagsTy;
4058 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4059 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4060 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4062 DepobjLVal.getAddress().withElementType(
4063 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4064 KmpDependInfoPtrTy->castAs<PointerType>());
4065 Address DepObjAddr = CGF.Builder.CreateGEP(
4066 CGF, Base.getAddress(),
4067 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4068 LValue NumDepsBase = CGF.MakeAddrLValue(
4069 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4070 // NumDeps = deps[i].base_addr;
4071 LValue BaseAddrLVal = CGF.EmitLValueForField(
4072 NumDepsBase,
4073 *std::next(KmpDependInfoRD->field_begin(),
4074 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4075 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4076 return std::make_pair(NumDeps, Base);
4077}
4078
4079static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4080 llvm::PointerUnion<unsigned *, LValue *> Pos,
4082 Address DependenciesArray) {
4083 CodeGenModule &CGM = CGF.CGM;
4084 ASTContext &C = CGM.getContext();
4085 QualType FlagsTy;
4086 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4087 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4088 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4089
4090 OMPIteratorGeneratorScope IteratorScope(
4091 CGF, cast_or_null<OMPIteratorExpr>(
4092 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4093 : nullptr));
4094 for (const Expr *E : Data.DepExprs) {
4095 llvm::Value *Addr;
4096 llvm::Value *Size;
4097
4098 // The expression will be a nullptr in the 'omp_all_memory' case.
4099 if (E) {
4100 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4101 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4102 } else {
4103 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4104 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4105 }
4106 LValue Base;
4107 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4108 Base = CGF.MakeAddrLValue(
4109 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4110 } else {
4111 assert(E && "Expected a non-null expression");
4112 LValue &PosLVal = *cast<LValue *>(Pos);
4113 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4114 Base = CGF.MakeAddrLValue(
4115 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4116 }
4117 // deps[i].base_addr = &<Dependencies[i].second>;
4118 LValue BaseAddrLVal = CGF.EmitLValueForField(
4119 Base,
4120 *std::next(KmpDependInfoRD->field_begin(),
4121 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4122 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4123 // deps[i].len = sizeof(<Dependencies[i].second>);
4124 LValue LenLVal = CGF.EmitLValueForField(
4125 Base, *std::next(KmpDependInfoRD->field_begin(),
4126 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4127 CGF.EmitStoreOfScalar(Size, LenLVal);
4128 // deps[i].flags = <Dependencies[i].first>;
4129 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4130 LValue FlagsLVal = CGF.EmitLValueForField(
4131 Base,
4132 *std::next(KmpDependInfoRD->field_begin(),
4133 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4135 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4136 FlagsLVal);
4137 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4138 ++(*P);
4139 } else {
4140 LValue &PosLVal = *cast<LValue *>(Pos);
4141 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4142 Idx = CGF.Builder.CreateNUWAdd(Idx,
4143 llvm::ConstantInt::get(Idx->getType(), 1));
4144 CGF.EmitStoreOfScalar(Idx, PosLVal);
4145 }
4146 }
4147}
4148
4152 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4153 "Expected depobj dependency kind.");
4155 SmallVector<LValue, 4> SizeLVals;
4156 ASTContext &C = CGF.getContext();
4157 {
4158 OMPIteratorGeneratorScope IteratorScope(
4159 CGF, cast_or_null<OMPIteratorExpr>(
4160 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4161 : nullptr));
4162 for (const Expr *E : Data.DepExprs) {
4163 llvm::Value *NumDeps;
4164 LValue Base;
4165 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4166 std::tie(NumDeps, Base) =
4167 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4168 LValue NumLVal = CGF.MakeAddrLValue(
4169 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4170 C.getUIntPtrType());
4171 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4172 NumLVal.getAddress());
4173 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4174 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4175 CGF.EmitStoreOfScalar(Add, NumLVal);
4176 SizeLVals.push_back(NumLVal);
4177 }
4178 }
4179 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4180 llvm::Value *Size =
4181 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4182 Sizes.push_back(Size);
4183 }
4184 return Sizes;
4185}
4186
4189 LValue PosLVal,
4191 Address DependenciesArray) {
4192 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4193 "Expected depobj dependency kind.");
4194 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4195 {
4196 OMPIteratorGeneratorScope IteratorScope(
4197 CGF, cast_or_null<OMPIteratorExpr>(
4198 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4199 : nullptr));
4200 for (const Expr *E : Data.DepExprs) {
4201 llvm::Value *NumDeps;
4202 LValue Base;
4203 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4204 std::tie(NumDeps, Base) =
4205 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4206
4207 // memcopy dependency data.
4208 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4209 ElSize,
4210 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4211 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4212 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4213 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4214
4215 // Increase pos.
4216 // pos += size;
4217 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4218 CGF.EmitStoreOfScalar(Add, PosLVal);
4219 }
4220 }
4221}
4222
4223std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4225 SourceLocation Loc) {
4226 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4227 return D.DepExprs.empty();
4228 }))
4229 return std::make_pair(nullptr, Address::invalid());
4230 // Process list of dependencies.
4231 ASTContext &C = CGM.getContext();
4232 Address DependenciesArray = Address::invalid();
4233 llvm::Value *NumOfElements = nullptr;
4234 unsigned NumDependencies = std::accumulate(
4235 Dependencies.begin(), Dependencies.end(), 0,
4236 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4237 return D.DepKind == OMPC_DEPEND_depobj
4238 ? V
4239 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4240 });
4241 QualType FlagsTy;
4242 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4243 bool HasDepobjDeps = false;
4244 bool HasRegularWithIterators = false;
4245 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4246 llvm::Value *NumOfRegularWithIterators =
4247 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4248 // Calculate number of depobj dependencies and regular deps with the
4249 // iterators.
4250 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4251 if (D.DepKind == OMPC_DEPEND_depobj) {
4254 for (llvm::Value *Size : Sizes) {
4255 NumOfDepobjElements =
4256 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4257 }
4258 HasDepobjDeps = true;
4259 continue;
4260 }
4261 // Include number of iterations, if any.
4262
4263 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4264 llvm::Value *ClauseIteratorSpace =
4265 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4266 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4267 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4268 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4269 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4270 }
4271 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4272 ClauseIteratorSpace,
4273 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4274 NumOfRegularWithIterators =
4275 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4276 HasRegularWithIterators = true;
4277 continue;
4278 }
4279 }
4280
4281 QualType KmpDependInfoArrayTy;
4282 if (HasDepobjDeps || HasRegularWithIterators) {
4283 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4284 /*isSigned=*/false);
4285 if (HasDepobjDeps) {
4286 NumOfElements =
4287 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4288 }
4289 if (HasRegularWithIterators) {
4290 NumOfElements =
4291 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4292 }
4293 auto *OVE = new (C) OpaqueValueExpr(
4294 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4295 VK_PRValue);
4296 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4297 RValue::get(NumOfElements));
4298 KmpDependInfoArrayTy =
4299 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4300 /*IndexTypeQuals=*/0);
4301 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4302 // Properly emit variable-sized array.
4303 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4305 CGF.EmitVarDecl(*PD);
4306 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4307 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4308 /*isSigned=*/false);
4309 } else {
4310 KmpDependInfoArrayTy = C.getConstantArrayType(
4311 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4312 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4313 DependenciesArray =
4314 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4315 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4316 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4317 /*isSigned=*/false);
4318 }
4319 unsigned Pos = 0;
4320 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4321 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4322 continue;
4323 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4324 }
4325 // Copy regular dependencies with iterators.
4326 LValue PosLVal = CGF.MakeAddrLValue(
4327 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4328 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4329 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4330 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4331 continue;
4332 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4333 }
4334 // Copy final depobj arrays without iterators.
4335 if (HasDepobjDeps) {
4336 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4337 if (Dep.DepKind != OMPC_DEPEND_depobj)
4338 continue;
4339 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4340 }
4341 }
4342 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4343 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4344 return std::make_pair(NumOfElements, DependenciesArray);
4345}
4346
4348 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4349 SourceLocation Loc) {
4350 if (Dependencies.DepExprs.empty())
4351 return Address::invalid();
4352 // Process list of dependencies.
4353 ASTContext &C = CGM.getContext();
4354 Address DependenciesArray = Address::invalid();
4355 unsigned NumDependencies = Dependencies.DepExprs.size();
4356 QualType FlagsTy;
4357 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4358 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4359
4360 llvm::Value *Size;
4361 // Define type kmp_depend_info[<Dependencies.size()>];
4362 // For depobj reserve one extra element to store the number of elements.
4363 // It is required to handle depobj(x) update(in) construct.
4364 // kmp_depend_info[<Dependencies.size()>] deps;
4365 llvm::Value *NumDepsVal;
4366 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4367 if (const auto *IE =
4368 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4369 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4370 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4371 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4372 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4373 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4374 }
4375 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4376 NumDepsVal);
4377 CharUnits SizeInBytes =
4378 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4379 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4380 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4381 NumDepsVal =
4382 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4383 } else {
4384 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4385 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4386 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4387 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4388 Size = CGM.getSize(Sz.alignTo(Align));
4389 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4390 }
4391 // Need to allocate on the dynamic memory.
4392 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4393 // Use default allocator.
4394 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4395 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4396
4397 llvm::Value *Addr =
4398 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4399 CGM.getModule(), OMPRTL___kmpc_alloc),
4400 Args, ".dep.arr.addr");
4401 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4403 Addr, CGF.Builder.getPtrTy(0));
4404 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4405 // Write number of elements in the first element of array for depobj.
4406 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4407 // deps[i].base_addr = NumDependencies;
4408 LValue BaseAddrLVal = CGF.EmitLValueForField(
4409 Base,
4410 *std::next(KmpDependInfoRD->field_begin(),
4411 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4412 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4413 llvm::PointerUnion<unsigned *, LValue *> Pos;
4414 unsigned Idx = 1;
4415 LValue PosLVal;
4416 if (Dependencies.IteratorExpr) {
4417 PosLVal = CGF.MakeAddrLValue(
4418 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4419 C.getSizeType());
4420 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4421 /*IsInit=*/true);
4422 Pos = &PosLVal;
4423 } else {
4424 Pos = &Idx;
4425 }
4426 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4427 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4428 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4429 CGF.Int8Ty);
4430 return DependenciesArray;
4431}
4432
4434 SourceLocation Loc) {
4435 ASTContext &C = CGM.getContext();
4436 QualType FlagsTy;
4437 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4438 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4439 C.VoidPtrTy.castAs<PointerType>());
4440 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4442 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4444 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4445 Addr.getElementType(), Addr.emitRawPointer(CGF),
4446 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4447 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4448 CGF.VoidPtrTy);
4449 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4450 // Use default allocator.
4451 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4452 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4453
4454 // _kmpc_free(gtid, addr, nullptr);
4455 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4456 CGM.getModule(), OMPRTL___kmpc_free),
4457 Args);
4458}
4459
4461 OpenMPDependClauseKind NewDepKind,
4462 SourceLocation Loc) {
4463 ASTContext &C = CGM.getContext();
4464 QualType FlagsTy;
4465 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4466 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4467 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4468 llvm::Value *NumDeps;
4469 LValue Base;
4470 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4471
4472 Address Begin = Base.getAddress();
4473 // Cast from pointer to array type to pointer to single element.
4474 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4475 Begin.emitRawPointer(CGF), NumDeps);
4476 // The basic structure here is a while-do loop.
4477 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4478 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4479 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4480 CGF.EmitBlock(BodyBB);
4481 llvm::PHINode *ElementPHI =
4482 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4483 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4484 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4485 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4486 Base.getTBAAInfo());
4487 // deps[i].flags = NewDepKind;
4488 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4489 LValue FlagsLVal = CGF.EmitLValueForField(
4490 Base, *std::next(KmpDependInfoRD->field_begin(),
4491 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4493 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4494 FlagsLVal);
4495
4496 // Shift the address forward by one element.
4497 llvm::Value *ElementNext =
4498 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4499 .emitRawPointer(CGF);
4500 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4501 llvm::Value *IsEmpty =
4502 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4503 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4504 // Done.
4505 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4506}
4507
4509 const OMPExecutableDirective &D,
4510 llvm::Function *TaskFunction,
4511 QualType SharedsTy, Address Shareds,
4512 const Expr *IfCond,
4513 const OMPTaskDataTy &Data) {
4514 if (!CGF.HaveInsertPoint())
4515 return;
4516
4518 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4519 llvm::Value *NewTask = Result.NewTask;
4520 llvm::Function *TaskEntry = Result.TaskEntry;
4521 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4522 LValue TDBase = Result.TDBase;
4523 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4524 // Process list of dependences.
4525 Address DependenciesArray = Address::invalid();
4526 llvm::Value *NumOfElements;
4527 std::tie(NumOfElements, DependenciesArray) =
4528 emitDependClause(CGF, Data.Dependences, Loc);
4529
4530 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4531 // libcall.
4532 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4533 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4534 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4535 // list is not empty
4536 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4537 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4538 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4539 llvm::Value *DepTaskArgs[7];
4540 if (!Data.Dependences.empty()) {
4541 DepTaskArgs[0] = UpLoc;
4542 DepTaskArgs[1] = ThreadID;
4543 DepTaskArgs[2] = NewTask;
4544 DepTaskArgs[3] = NumOfElements;
4545 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4546 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4547 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4548 }
4549 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4550 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4551 if (!Data.Tied) {
4552 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4553 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4554 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4555 }
4556 if (!Data.Dependences.empty()) {
4557 CGF.EmitRuntimeCall(
4558 OMPBuilder.getOrCreateRuntimeFunction(
4559 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4560 DepTaskArgs);
4561 } else {
4562 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4563 CGM.getModule(), OMPRTL___kmpc_omp_task),
4564 TaskArgs);
4565 }
4566 // Check if parent region is untied and build return for untied task;
4567 if (auto *Region =
4568 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4569 Region->emitUntiedSwitch(CGF);
4570 };
4571
4572 llvm::Value *DepWaitTaskArgs[7];
4573 if (!Data.Dependences.empty()) {
4574 DepWaitTaskArgs[0] = UpLoc;
4575 DepWaitTaskArgs[1] = ThreadID;
4576 DepWaitTaskArgs[2] = NumOfElements;
4577 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4578 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4579 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4580 DepWaitTaskArgs[6] =
4581 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4582 }
4583 auto &M = CGM.getModule();
4584 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4585 TaskEntry, &Data, &DepWaitTaskArgs,
4586 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4587 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4588 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4589 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4590 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4591 // is specified.
4592 if (!Data.Dependences.empty())
4593 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4594 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4595 DepWaitTaskArgs);
4596 // Call proxy_task_entry(gtid, new_task);
4597 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4598 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4599 Action.Enter(CGF);
4600 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4601 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4602 OutlinedFnArgs);
4603 };
4604
4605 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4606 // kmp_task_t *new_task);
4607 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4608 // kmp_task_t *new_task);
4610 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4611 M, OMPRTL___kmpc_omp_task_begin_if0),
4612 TaskArgs,
4613 OMPBuilder.getOrCreateRuntimeFunction(
4614 M, OMPRTL___kmpc_omp_task_complete_if0),
4615 TaskArgs);
4616 RCG.setAction(Action);
4617 RCG(CGF);
4618 };
4619
4620 if (IfCond) {
4621 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4622 } else {
4623 RegionCodeGenTy ThenRCG(ThenCodeGen);
4624 ThenRCG(CGF);
4625 }
4626}
4627
4629 const OMPLoopDirective &D,
4630 llvm::Function *TaskFunction,
4631 QualType SharedsTy, Address Shareds,
4632 const Expr *IfCond,
4633 const OMPTaskDataTy &Data) {
4634 if (!CGF.HaveInsertPoint())
4635 return;
4637 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4638 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4639 // libcall.
4640 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4641 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4642 // sched, kmp_uint64 grainsize, void *task_dup);
4643 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4644 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4645 llvm::Value *IfVal;
4646 if (IfCond) {
4647 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4648 /*isSigned=*/true);
4649 } else {
4650 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4651 }
4652
4653 LValue LBLVal = CGF.EmitLValueForField(
4654 Result.TDBase,
4655 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4656 const auto *LBVar =
4657 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4658 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4659 /*IsInitializer=*/true);
4660 LValue UBLVal = CGF.EmitLValueForField(
4661 Result.TDBase,
4662 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4663 const auto *UBVar =
4664 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4665 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4666 /*IsInitializer=*/true);
4667 LValue StLVal = CGF.EmitLValueForField(
4668 Result.TDBase,
4669 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4670 const auto *StVar =
4671 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4672 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4673 /*IsInitializer=*/true);
4674 // Store reductions address.
4675 LValue RedLVal = CGF.EmitLValueForField(
4676 Result.TDBase,
4677 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4678 if (Data.Reductions) {
4679 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4680 } else {
4681 CGF.EmitNullInitialization(RedLVal.getAddress(),
4682 CGF.getContext().VoidPtrTy);
4683 }
4684 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4686 UpLoc,
4687 ThreadID,
4688 Result.NewTask,
4689 IfVal,
4690 LBLVal.getPointer(CGF),
4691 UBLVal.getPointer(CGF),
4692 CGF.EmitLoadOfScalar(StLVal, Loc),
4693 llvm::ConstantInt::getSigned(
4694 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4695 llvm::ConstantInt::getSigned(
4696 CGF.IntTy, Data.Schedule.getPointer()
4697 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4698 : NoSchedule),
4699 Data.Schedule.getPointer()
4700 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4701 /*isSigned=*/false)
4702 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4703 if (Data.HasModifier)
4704 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4705
4706 TaskArgs.push_back(Result.TaskDupFn
4708 Result.TaskDupFn, CGF.VoidPtrTy)
4709 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4710 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4711 CGM.getModule(), Data.HasModifier
4712 ? OMPRTL___kmpc_taskloop_5
4713 : OMPRTL___kmpc_taskloop),
4714 TaskArgs);
4715}
4716
4717/// Emit reduction operation for each element of array (required for
4718/// array sections) LHS op = RHS.
4719/// \param Type Type of array.
4720/// \param LHSVar Variable on the left side of the reduction operation
4721/// (references element of array in original variable).
4722/// \param RHSVar Variable on the right side of the reduction operation
4723/// (references element of array in original variable).
4724/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4725/// RHSVar.
4727 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4728 const VarDecl *RHSVar,
4729 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4730 const Expr *, const Expr *)> &RedOpGen,
4731 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4732 const Expr *UpExpr = nullptr) {
4733 // Perform element-by-element initialization.
4734 QualType ElementTy;
4735 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4736 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4737
4738 // Drill down to the base element type on both arrays.
4739 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4740 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4741
4742 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4743 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4744 // Cast from pointer to array type to pointer to single element.
4745 llvm::Value *LHSEnd =
4746 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4747 // The basic structure here is a while-do loop.
4748 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4749 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4750 llvm::Value *IsEmpty =
4751 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4752 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4753
4754 // Enter the loop body, making that address the current address.
4755 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4756 CGF.EmitBlock(BodyBB);
4757
4758 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4759
4760 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4761 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4762 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4763 Address RHSElementCurrent(
4764 RHSElementPHI, RHSAddr.getElementType(),
4765 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4766
4767 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4768 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4769 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4770 Address LHSElementCurrent(
4771 LHSElementPHI, LHSAddr.getElementType(),
4772 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4773
4774 // Emit copy.
4776 Scope.addPrivate(LHSVar, LHSElementCurrent);
4777 Scope.addPrivate(RHSVar, RHSElementCurrent);
4778 Scope.Privatize();
4779 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4780 Scope.ForceCleanup();
4781
4782 // Shift the address forward by one element.
4783 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4784 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4785 "omp.arraycpy.dest.element");
4786 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4787 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4788 "omp.arraycpy.src.element");
4789 // Check whether we've reached the end.
4790 llvm::Value *Done =
4791 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4792 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4793 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4794 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4795
4796 // Done.
4797 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4798}
4799
4800/// Emit reduction combiner. If the combiner is a simple expression emit it as
4801/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4802/// UDR combiner function.
4804 const Expr *ReductionOp) {
4805 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4806 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4807 if (const auto *DRE =
4808 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4809 if (const auto *DRD =
4810 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4811 std::pair<llvm::Function *, llvm::Function *> Reduction =
4815 CGF.EmitIgnoredExpr(ReductionOp);
4816 return;
4817 }
4818 CGF.EmitIgnoredExpr(ReductionOp);
4819}
4820
4822 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4824 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4825 ASTContext &C = CGM.getContext();
4826
4827 // void reduction_func(void *LHSArg, void *RHSArg);
4828 FunctionArgList Args;
4829 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4831 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4833 Args.push_back(&LHSArg);
4834 Args.push_back(&RHSArg);
4835 const auto &CGFI =
4836 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4837 std::string Name = getReductionFuncName(ReducerName);
4838 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4839 llvm::GlobalValue::InternalLinkage, Name,
4840 &CGM.getModule());
4841 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4842 Fn->setDoesNotRecurse();
4843 CodeGenFunction CGF(CGM);
4844 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4845
4846 // Dst = (void*[n])(LHSArg);
4847 // Src = (void*[n])(RHSArg);
4849 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4850 CGF.Builder.getPtrTy(0)),
4851 ArgsElemType, CGF.getPointerAlign());
4853 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4854 CGF.Builder.getPtrTy(0)),
4855 ArgsElemType, CGF.getPointerAlign());
4856
4857 // ...
4858 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4859 // ...
4861 const auto *IPriv = Privates.begin();
4862 unsigned Idx = 0;
4863 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4864 const auto *RHSVar =
4865 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4866 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4867 const auto *LHSVar =
4868 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4869 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4870 QualType PrivTy = (*IPriv)->getType();
4871 if (PrivTy->isVariablyModifiedType()) {
4872 // Get array size and emit VLA type.
4873 ++Idx;
4874 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4875 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4876 const VariableArrayType *VLA =
4877 CGF.getContext().getAsVariableArrayType(PrivTy);
4878 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4880 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4881 CGF.EmitVariablyModifiedType(PrivTy);
4882 }
4883 }
4884 Scope.Privatize();
4885 IPriv = Privates.begin();
4886 const auto *ILHS = LHSExprs.begin();
4887 const auto *IRHS = RHSExprs.begin();
4888 for (const Expr *E : ReductionOps) {
4889 if ((*IPriv)->getType()->isArrayType()) {
4890 // Emit reduction for array section.
4891 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4892 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4894 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4895 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4896 emitReductionCombiner(CGF, E);
4897 });
4898 } else {
4899 // Emit reduction for array subscript or single variable.
4900 emitReductionCombiner(CGF, E);
4901 }
4902 ++IPriv;
4903 ++ILHS;
4904 ++IRHS;
4905 }
4906 Scope.ForceCleanup();
4907 CGF.FinishFunction();
4908 return Fn;
4909}
4910
4912 const Expr *ReductionOp,
4913 const Expr *PrivateRef,
4914 const DeclRefExpr *LHS,
4915 const DeclRefExpr *RHS) {
4916 if (PrivateRef->getType()->isArrayType()) {
4917 // Emit reduction for array section.
4918 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4919 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4921 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4922 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4923 emitReductionCombiner(CGF, ReductionOp);
4924 });
4925 } else {
4926 // Emit reduction for array subscript or single variable.
4927 emitReductionCombiner(CGF, ReductionOp);
4928 }
4929}
4930
4931static std::string generateUniqueName(CodeGenModule &CGM,
4932 llvm::StringRef Prefix, const Expr *Ref);
4933
4935 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4936 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4937
4938 // Create a shared global variable (__shared_reduction_var) to accumulate the
4939 // final result.
4940 //
4941 // Call __kmpc_barrier to synchronize threads before initialization.
4942 //
4943 // The master thread (thread_id == 0) initializes __shared_reduction_var
4944 // with the identity value or initializer.
4945 //
4946 // Call __kmpc_barrier to synchronize before combining.
4947 // For each i:
4948 // - Thread enters critical section.
4949 // - Reads its private value from LHSExprs[i].
4950 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4951 // Privates[i]).
4952 // - Exits critical section.
4953 //
4954 // Call __kmpc_barrier after combining.
4955 //
4956 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4957 //
4958 // Final __kmpc_barrier to synchronize after broadcasting
4959 QualType PrivateType = Privates->getType();
4960 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4961
4962 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4963 std::string ReductionVarNameStr;
4964 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4965 ReductionVarNameStr =
4966 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4967 else
4968 ReductionVarNameStr = "unnamed_priv_var";
4969
4970 // Create an internal shared variable
4971 std::string SharedName =
4972 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4973 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4974 LLVMType, ".omp.reduction." + SharedName);
4975
4976 SharedVar->setAlignment(
4977 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4978
4979 Address SharedResult =
4980 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4981
4982 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4983 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4984 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4985
4986 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4987 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4988
4989 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4990 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4991 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
4992
4993 CGF.EmitBlock(InitBB);
4994
4995 auto EmitSharedInit = [&]() {
4996 if (UDR) { // Check if it's a User-Defined Reduction
4997 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
4998 std::pair<llvm::Function *, llvm::Function *> FnPair =
5000 llvm::Function *InitializerFn = FnPair.second;
5001 if (InitializerFn) {
5002 if (const auto *CE =
5003 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5004 const auto *OutDRE = cast<DeclRefExpr>(
5005 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5006 ->getSubExpr());
5007 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5008
5009 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5010 LocalScope.addPrivate(OutVD, SharedResult);
5011
5012 (void)LocalScope.Privatize();
5013 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5014 CE->getCallee()->IgnoreParenImpCasts())) {
5016 CGF, OVE, RValue::get(InitializerFn));
5017 CGF.EmitIgnoredExpr(CE);
5018 } else {
5019 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5020 PrivateType.getQualifiers(),
5021 /*IsInitializer=*/true);
5022 }
5023 } else {
5024 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5025 PrivateType.getQualifiers(),
5026 /*IsInitializer=*/true);
5027 }
5028 } else {
5029 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5030 PrivateType.getQualifiers(),
5031 /*IsInitializer=*/true);
5032 }
5033 } else {
5034 // EmitNullInitialization handles default construction for C++ classes
5035 // and zeroing for scalars, which is a reasonable default.
5036 CGF.EmitNullInitialization(SharedResult, PrivateType);
5037 }
5038 return; // UDR initialization handled
5039 }
5040 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5041 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5042 if (const Expr *InitExpr = VD->getInit()) {
5043 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5044 PrivateType.getQualifiers(), true);
5045 return;
5046 }
5047 }
5048 }
5049 CGF.EmitNullInitialization(SharedResult, PrivateType);
5050 };
5051 EmitSharedInit();
5052 CGF.Builder.CreateBr(InitEndBB);
5053 CGF.EmitBlock(InitEndBB);
5054
5055 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5056 CGM.getModule(), OMPRTL___kmpc_barrier),
5057 BarrierArgs);
5058
5059 const Expr *ReductionOp = ReductionOps;
5060 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5061 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5062 LValue LHSLV = CGF.EmitLValue(Privates);
5063
5064 auto EmitCriticalReduction = [&](auto ReductionGen) {
5065 std::string CriticalName = getName({"reduction_critical"});
5066 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5067 };
5068
5069 if (CurrentUDR) {
5070 // Handle user-defined reduction.
5071 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5072 Action.Enter(CGF);
5073 std::pair<llvm::Function *, llvm::Function *> FnPair =
5074 getUserDefinedReduction(CurrentUDR);
5075 if (FnPair.first) {
5076 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5077 const auto *OutDRE = cast<DeclRefExpr>(
5078 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5079 ->getSubExpr());
5080 const auto *InDRE = cast<DeclRefExpr>(
5081 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5082 ->getSubExpr());
5083 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5084 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5085 SharedLV.getAddress());
5086 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5087 LHSLV.getAddress());
5088 (void)LocalScope.Privatize();
5089 emitReductionCombiner(CGF, ReductionOp);
5090 }
5091 }
5092 };
5093 EmitCriticalReduction(ReductionGen);
5094 } else {
5095 // Handle built-in reduction operations.
5096#ifndef NDEBUG
5097 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5098 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5099 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5100
5101 const Expr *AssignRHS = nullptr;
5102 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5103 if (BinOp->getOpcode() == BO_Assign)
5104 AssignRHS = BinOp->getRHS();
5105 } else if (const auto *OpCall =
5106 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5107 if (OpCall->getOperator() == OO_Equal)
5108 AssignRHS = OpCall->getArg(1);
5109 }
5110
5111 assert(AssignRHS &&
5112 "Private Variable Reduction : Invalid ReductionOp expression");
5113#endif
5114
5115 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5116 Action.Enter(CGF);
5117 const auto *OmpOutDRE =
5118 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5119 const auto *OmpInDRE =
5120 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5121 assert(
5122 OmpOutDRE && OmpInDRE &&
5123 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5124 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5125 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5126 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5127 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5128 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5129 (void)LocalScope.Privatize();
5130 // Emit the actual reduction operation
5131 CGF.EmitIgnoredExpr(ReductionOp);
5132 };
5133 EmitCriticalReduction(ReductionGen);
5134 }
5135
5136 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5137 CGM.getModule(), OMPRTL___kmpc_barrier),
5138 BarrierArgs);
5139
5140 // Broadcast final result
5141 bool IsAggregate = PrivateType->isAggregateType();
5142 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5143 llvm::Value *FinalResultVal = nullptr;
5144 Address FinalResultAddr = Address::invalid();
5145
5146 if (IsAggregate)
5147 FinalResultAddr = SharedResult;
5148 else
5149 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5150
5151 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5152 if (IsAggregate) {
5153 CGF.EmitAggregateCopy(TargetLHSLV,
5154 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5155 PrivateType, AggValueSlot::DoesNotOverlap, false);
5156 } else {
5157 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5158 }
5159 // Final synchronization barrier
5160 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5161 CGM.getModule(), OMPRTL___kmpc_barrier),
5162 BarrierArgs);
5163
5164 // Combiner with original list item
5165 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5166 PrePostActionTy &Action) {
5167 Action.Enter(CGF);
5168 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5169 cast<DeclRefExpr>(LHSExprs),
5170 cast<DeclRefExpr>(RHSExprs));
5171 };
5172 EmitCriticalReduction(OriginalListCombiner);
5173}
5174
5176 ArrayRef<const Expr *> OrgPrivates,
5177 ArrayRef<const Expr *> OrgLHSExprs,
5178 ArrayRef<const Expr *> OrgRHSExprs,
5179 ArrayRef<const Expr *> OrgReductionOps,
5180 ReductionOptionsTy Options) {
5181 if (!CGF.HaveInsertPoint())
5182 return;
5183
5184 bool WithNowait = Options.WithNowait;
5185 bool SimpleReduction = Options.SimpleReduction;
5186
5187 // Next code should be emitted for reduction:
5188 //
5189 // static kmp_critical_name lock = { 0 };
5190 //
5191 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5192 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5193 // ...
5194 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5195 // *(Type<n>-1*)rhs[<n>-1]);
5196 // }
5197 //
5198 // ...
5199 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5200 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5201 // RedList, reduce_func, &<lock>)) {
5202 // case 1:
5203 // ...
5204 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5205 // ...
5206 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5207 // break;
5208 // case 2:
5209 // ...
5210 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5211 // ...
5212 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5213 // break;
5214 // default:;
5215 // }
5216 //
5217 // if SimpleReduction is true, only the next code is generated:
5218 // ...
5219 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5220 // ...
5221
5222 ASTContext &C = CGM.getContext();
5223
5224 if (SimpleReduction) {
5226 const auto *IPriv = OrgPrivates.begin();
5227 const auto *ILHS = OrgLHSExprs.begin();
5228 const auto *IRHS = OrgRHSExprs.begin();
5229 for (const Expr *E : OrgReductionOps) {
5230 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5231 cast<DeclRefExpr>(*IRHS));
5232 ++IPriv;
5233 ++ILHS;
5234 ++IRHS;
5235 }
5236 return;
5237 }
5238
5239 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5240 // Only keep entries where the corresponding variable is not private.
5241 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5242 FilteredRHSExprs, FilteredReductionOps;
5243 for (unsigned I : llvm::seq<unsigned>(
5244 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5245 if (!Options.IsPrivateVarReduction[I]) {
5246 FilteredPrivates.emplace_back(OrgPrivates[I]);
5247 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5248 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5249 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5250 }
5251 }
5252 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5253 // processing.
5254 ArrayRef<const Expr *> Privates = FilteredPrivates;
5255 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5256 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5257 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5258
5259 // 1. Build a list of reduction variables.
5260 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5261 auto Size = RHSExprs.size();
5262 for (const Expr *E : Privates) {
5263 if (E->getType()->isVariablyModifiedType())
5264 // Reserve place for array size.
5265 ++Size;
5266 }
5267 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5268 QualType ReductionArrayTy = C.getConstantArrayType(
5269 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5270 /*IndexTypeQuals=*/0);
5271 RawAddress ReductionList =
5272 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5273 const auto *IPriv = Privates.begin();
5274 unsigned Idx = 0;
5275 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5276 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5277 CGF.Builder.CreateStore(
5279 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5280 Elem);
5281 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5282 // Store array size.
5283 ++Idx;
5284 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5285 llvm::Value *Size = CGF.Builder.CreateIntCast(
5286 CGF.getVLASize(
5287 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5288 .NumElts,
5289 CGF.SizeTy, /*isSigned=*/false);
5290 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5291 Elem);
5292 }
5293 }
5294
5295 // 2. Emit reduce_func().
5296 llvm::Function *ReductionFn = emitReductionFunction(
5297 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5298 Privates, LHSExprs, RHSExprs, ReductionOps);
5299
5300 // 3. Create static kmp_critical_name lock = { 0 };
5301 std::string Name = getName({"reduction"});
5302 llvm::Value *Lock = getCriticalRegionLock(Name);
5303
5304 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5305 // RedList, reduce_func, &<lock>);
5306 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5307 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5308 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5309 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5310 ReductionList.getPointer(), CGF.VoidPtrTy);
5311 llvm::Value *Args[] = {
5312 IdentTLoc, // ident_t *<loc>
5313 ThreadId, // i32 <gtid>
5314 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5315 ReductionArrayTySize, // size_type sizeof(RedList)
5316 RL, // void *RedList
5317 ReductionFn, // void (*) (void *, void *) <reduce_func>
5318 Lock // kmp_critical_name *&<lock>
5319 };
5320 llvm::Value *Res = CGF.EmitRuntimeCall(
5321 OMPBuilder.getOrCreateRuntimeFunction(
5322 CGM.getModule(),
5323 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5324 Args);
5325
5326 // 5. Build switch(res)
5327 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5328 llvm::SwitchInst *SwInst =
5329 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5330
5331 // 6. Build case 1:
5332 // ...
5333 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5334 // ...
5335 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5336 // break;
5337 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5338 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5339 CGF.EmitBlock(Case1BB);
5340
5341 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5342 llvm::Value *EndArgs[] = {
5343 IdentTLoc, // ident_t *<loc>
5344 ThreadId, // i32 <gtid>
5345 Lock // kmp_critical_name *&<lock>
5346 };
5347 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5348 CodeGenFunction &CGF, PrePostActionTy &Action) {
5350 const auto *IPriv = Privates.begin();
5351 const auto *ILHS = LHSExprs.begin();
5352 const auto *IRHS = RHSExprs.begin();
5353 for (const Expr *E : ReductionOps) {
5354 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5355 cast<DeclRefExpr>(*IRHS));
5356 ++IPriv;
5357 ++ILHS;
5358 ++IRHS;
5359 }
5360 };
5362 CommonActionTy Action(
5363 nullptr, {},
5364 OMPBuilder.getOrCreateRuntimeFunction(
5365 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5366 : OMPRTL___kmpc_end_reduce),
5367 EndArgs);
5368 RCG.setAction(Action);
5369 RCG(CGF);
5370
5371 CGF.EmitBranch(DefaultBB);
5372
5373 // 7. Build case 2:
5374 // ...
5375 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5376 // ...
5377 // break;
5378 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5379 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5380 CGF.EmitBlock(Case2BB);
5381
5382 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5383 CodeGenFunction &CGF, PrePostActionTy &Action) {
5384 const auto *ILHS = LHSExprs.begin();
5385 const auto *IRHS = RHSExprs.begin();
5386 const auto *IPriv = Privates.begin();
5387 for (const Expr *E : ReductionOps) {
5388 const Expr *XExpr = nullptr;
5389 const Expr *EExpr = nullptr;
5390 const Expr *UpExpr = nullptr;
5391 BinaryOperatorKind BO = BO_Comma;
5392 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5393 if (BO->getOpcode() == BO_Assign) {
5394 XExpr = BO->getLHS();
5395 UpExpr = BO->getRHS();
5396 }
5397 }
5398 // Try to emit update expression as a simple atomic.
5399 const Expr *RHSExpr = UpExpr;
5400 if (RHSExpr) {
5401 // Analyze RHS part of the whole expression.
5402 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5403 RHSExpr->IgnoreParenImpCasts())) {
5404 // If this is a conditional operator, analyze its condition for
5405 // min/max reduction operator.
5406 RHSExpr = ACO->getCond();
5407 }
5408 if (const auto *BORHS =
5409 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5410 EExpr = BORHS->getRHS();
5411 BO = BORHS->getOpcode();
5412 }
5413 }
5414 if (XExpr) {
5415 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5416 auto &&AtomicRedGen = [BO, VD,
5417 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5418 const Expr *EExpr, const Expr *UpExpr) {
5419 LValue X = CGF.EmitLValue(XExpr);
5420 RValue E;
5421 if (EExpr)
5422 E = CGF.EmitAnyExpr(EExpr);
5423 CGF.EmitOMPAtomicSimpleUpdateExpr(
5424 X, E, BO, /*IsXLHSInRHSPart=*/true,
5425 llvm::AtomicOrdering::Monotonic, Loc,
5426 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5427 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5428 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5429 CGF.emitOMPSimpleStore(
5430 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5431 VD->getType().getNonReferenceType(), Loc);
5432 PrivateScope.addPrivate(VD, LHSTemp);
5433 (void)PrivateScope.Privatize();
5434 return CGF.EmitAnyExpr(UpExpr);
5435 });
5436 };
5437 if ((*IPriv)->getType()->isArrayType()) {
5438 // Emit atomic reduction for array section.
5439 const auto *RHSVar =
5440 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5441 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5442 AtomicRedGen, XExpr, EExpr, UpExpr);
5443 } else {
5444 // Emit atomic reduction for array subscript or single variable.
5445 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5446 }
5447 } else {
5448 // Emit as a critical region.
5449 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5450 const Expr *, const Expr *) {
5451 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5452 std::string Name = RT.getName({"atomic_reduction"});
5454 CGF, Name,
5455 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5456 Action.Enter(CGF);
5457 emitReductionCombiner(CGF, E);
5458 },
5459 Loc);
5460 };
5461 if ((*IPriv)->getType()->isArrayType()) {
5462 const auto *LHSVar =
5463 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5464 const auto *RHSVar =
5465 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5466 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5467 CritRedGen);
5468 } else {
5469 CritRedGen(CGF, nullptr, nullptr, nullptr);
5470 }
5471 }
5472 ++ILHS;
5473 ++IRHS;
5474 ++IPriv;
5475 }
5476 };
5477 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5478 if (!WithNowait) {
5479 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5480 llvm::Value *EndArgs[] = {
5481 IdentTLoc, // ident_t *<loc>
5482 ThreadId, // i32 <gtid>
5483 Lock // kmp_critical_name *&<lock>
5484 };
5485 CommonActionTy Action(nullptr, {},
5486 OMPBuilder.getOrCreateRuntimeFunction(
5487 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5488 EndArgs);
5489 AtomicRCG.setAction(Action);
5490 AtomicRCG(CGF);
5491 } else {
5492 AtomicRCG(CGF);
5493 }
5494
5495 CGF.EmitBranch(DefaultBB);
5496 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5497 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5498 "PrivateVarReduction: Privates size mismatch");
5499 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5500 "PrivateVarReduction: ReductionOps size mismatch");
5501 for (unsigned I : llvm::seq<unsigned>(
5502 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5503 if (Options.IsPrivateVarReduction[I])
5504 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5505 OrgRHSExprs[I], OrgReductionOps[I]);
5506 }
5507}
5508
5509/// Generates unique name for artificial threadprivate variables.
5510/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5511static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5512 const Expr *Ref) {
5513 SmallString<256> Buffer;
5514 llvm::raw_svector_ostream Out(Buffer);
5515 const clang::DeclRefExpr *DE;
5516 const VarDecl *D = ::getBaseDecl(Ref, DE);
5517 if (!D)
5518 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5519 D = D->getCanonicalDecl();
5520 std::string Name = CGM.getOpenMPRuntime().getName(
5521 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5522 Out << Prefix << Name << "_"
5524 return std::string(Out.str());
5525}
5526
5527/// Emits reduction initializer function:
5528/// \code
5529/// void @.red_init(void* %arg, void* %orig) {
5530/// %0 = bitcast void* %arg to <type>*
5531/// store <type> <init>, <type>* %0
5532/// ret void
5533/// }
5534/// \endcode
5535static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5536 SourceLocation Loc,
5537 ReductionCodeGen &RCG, unsigned N) {
5538 ASTContext &C = CGM.getContext();
5539 QualType VoidPtrTy = C.VoidPtrTy;
5540 VoidPtrTy.addRestrict();
5541 FunctionArgList Args;
5542 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5544 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5546 Args.emplace_back(&Param);
5547 Args.emplace_back(&ParamOrig);
5548 const auto &FnInfo =
5549 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5550 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5551 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5552 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5553 Name, &CGM.getModule());
5554 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5555 Fn->setDoesNotRecurse();
5556 CodeGenFunction CGF(CGM);
5557 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5558 QualType PrivateType = RCG.getPrivateType(N);
5559 Address PrivateAddr = CGF.EmitLoadOfPointer(
5560 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5561 C.getPointerType(PrivateType)->castAs<PointerType>());
5562 llvm::Value *Size = nullptr;
5563 // If the size of the reduction item is non-constant, load it from global
5564 // threadprivate variable.
5565 if (RCG.getSizes(N).second) {
5567 CGF, CGM.getContext().getSizeType(),
5568 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5569 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5570 CGM.getContext().getSizeType(), Loc);
5571 }
5572 RCG.emitAggregateType(CGF, N, Size);
5573 Address OrigAddr = Address::invalid();
5574 // If initializer uses initializer from declare reduction construct, emit a
5575 // pointer to the address of the original reduction item (reuired by reduction
5576 // initializer)
5577 if (RCG.usesReductionInitializer(N)) {
5578 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5579 OrigAddr = CGF.EmitLoadOfPointer(
5580 SharedAddr,
5581 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5582 }
5583 // Emit the initializer:
5584 // %0 = bitcast void* %arg to <type>*
5585 // store <type> <init>, <type>* %0
5586 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5587 [](CodeGenFunction &) { return false; });
5588 CGF.FinishFunction();
5589 return Fn;
5590}
5591
5592/// Emits reduction combiner function:
5593/// \code
5594/// void @.red_comb(void* %arg0, void* %arg1) {
5595/// %lhs = bitcast void* %arg0 to <type>*
5596/// %rhs = bitcast void* %arg1 to <type>*
5597/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5598/// store <type> %2, <type>* %lhs
5599/// ret void
5600/// }
5601/// \endcode
5602static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5603 SourceLocation Loc,
5604 ReductionCodeGen &RCG, unsigned N,
5605 const Expr *ReductionOp,
5606 const Expr *LHS, const Expr *RHS,
5607 const Expr *PrivateRef) {
5608 ASTContext &C = CGM.getContext();
5609 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5610 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5611 FunctionArgList Args;
5612 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5613 C.VoidPtrTy, ImplicitParamKind::Other);
5614 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5616 Args.emplace_back(&ParamInOut);
5617 Args.emplace_back(&ParamIn);
5618 const auto &FnInfo =
5619 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5620 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5621 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5622 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5623 Name, &CGM.getModule());
5624 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5625 Fn->setDoesNotRecurse();
5626 CodeGenFunction CGF(CGM);
5627 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5628 llvm::Value *Size = nullptr;
5629 // If the size of the reduction item is non-constant, load it from global
5630 // threadprivate variable.
5631 if (RCG.getSizes(N).second) {
5633 CGF, CGM.getContext().getSizeType(),
5634 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5635 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5636 CGM.getContext().getSizeType(), Loc);
5637 }
5638 RCG.emitAggregateType(CGF, N, Size);
5639 // Remap lhs and rhs variables to the addresses of the function arguments.
5640 // %lhs = bitcast void* %arg0 to <type>*
5641 // %rhs = bitcast void* %arg1 to <type>*
5642 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5643 PrivateScope.addPrivate(
5644 LHSVD,
5645 // Pull out the pointer to the variable.
5647 CGF.GetAddrOfLocalVar(&ParamInOut)
5648 .withElementType(CGF.Builder.getPtrTy(0)),
5649 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5650 PrivateScope.addPrivate(
5651 RHSVD,
5652 // Pull out the pointer to the variable.
5654 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5655 CGF.Builder.getPtrTy(0)),
5656 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5657 PrivateScope.Privatize();
5658 // Emit the combiner body:
5659 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5660 // store <type> %2, <type>* %lhs
5662 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5663 cast<DeclRefExpr>(RHS));
5664 CGF.FinishFunction();
5665 return Fn;
5666}
5667
5668/// Emits reduction finalizer function:
5669/// \code
5670/// void @.red_fini(void* %arg) {
5671/// %0 = bitcast void* %arg to <type>*
5672/// <destroy>(<type>* %0)
5673/// ret void
5674/// }
5675/// \endcode
5676static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5677 SourceLocation Loc,
5678 ReductionCodeGen &RCG, unsigned N) {
5679 if (!RCG.needCleanups(N))
5680 return nullptr;
5681 ASTContext &C = CGM.getContext();
5682 FunctionArgList Args;
5683 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5685 Args.emplace_back(&Param);
5686 const auto &FnInfo =
5687 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5688 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5689 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5690 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5691 Name, &CGM.getModule());
5692 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5693 Fn->setDoesNotRecurse();
5694 CodeGenFunction CGF(CGM);
5695 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5696 Address PrivateAddr = CGF.EmitLoadOfPointer(
5697 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5698 llvm::Value *Size = nullptr;
5699 // If the size of the reduction item is non-constant, load it from global
5700 // threadprivate variable.
5701 if (RCG.getSizes(N).second) {
5703 CGF, CGM.getContext().getSizeType(),
5704 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5705 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5706 CGM.getContext().getSizeType(), Loc);
5707 }
5708 RCG.emitAggregateType(CGF, N, Size);
5709 // Emit the finalizer body:
5710 // <destroy>(<type>* %0)
5711 RCG.emitCleanups(CGF, N, PrivateAddr);
5712 CGF.FinishFunction(Loc);
5713 return Fn;
5714}
5715
5718 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5719 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5720 return nullptr;
5721
5722 // Build typedef struct:
5723 // kmp_taskred_input {
5724 // void *reduce_shar; // shared reduction item
5725 // void *reduce_orig; // original reduction item used for initialization
5726 // size_t reduce_size; // size of data item
5727 // void *reduce_init; // data initialization routine
5728 // void *reduce_fini; // data finalization routine
5729 // void *reduce_comb; // data combiner routine
5730 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5731 // } kmp_taskred_input_t;
5732 ASTContext &C = CGM.getContext();
5733 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5734 RD->startDefinition();
5735 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5736 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5737 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5738 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5739 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5740 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5741 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5742 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5743 RD->completeDefinition();
5744 CanQualType RDType = C.getCanonicalTagType(RD);
5745 unsigned Size = Data.ReductionVars.size();
5746 llvm::APInt ArraySize(/*numBits=*/64, Size);
5747 QualType ArrayRDType =
5748 C.getConstantArrayType(RDType, ArraySize, nullptr,
5749 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5750 // kmp_task_red_input_t .rd_input.[Size];
5751 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5752 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5753 Data.ReductionCopies, Data.ReductionOps);
5754 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5755 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5756 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5757 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5758 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5759 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5760 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5761 ".rd_input.gep.");
5762 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5763 // ElemLVal.reduce_shar = &Shareds[Cnt];
5764 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5765 RCG.emitSharedOrigLValue(CGF, Cnt);
5766 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5767 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5768 // ElemLVal.reduce_orig = &Origs[Cnt];
5769 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5770 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5771 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5772 RCG.emitAggregateType(CGF, Cnt);
5773 llvm::Value *SizeValInChars;
5774 llvm::Value *SizeVal;
5775 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5776 // We use delayed creation/initialization for VLAs and array sections. It is
5777 // required because runtime does not provide the way to pass the sizes of
5778 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5779 // threadprivate global variables are used to store these values and use
5780 // them in the functions.
5781 bool DelayedCreation = !!SizeVal;
5782 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5783 /*isSigned=*/false);
5784 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5785 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5786 // ElemLVal.reduce_init = init;
5787 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5788 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5789 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5790 // ElemLVal.reduce_fini = fini;
5791 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5792 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5793 llvm::Value *FiniAddr =
5794 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5795 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5796 // ElemLVal.reduce_comb = comb;
5797 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5798 llvm::Value *CombAddr = emitReduceCombFunction(
5799 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5800 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5801 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5802 // ElemLVal.flags = 0;
5803 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5804 if (DelayedCreation) {
5806 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5807 FlagsLVal);
5808 } else
5809 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5810 }
5811 if (Data.IsReductionWithTaskMod) {
5812 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5813 // is_ws, int num, void *data);
5814 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5815 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5816 CGM.IntTy, /*isSigned=*/true);
5817 llvm::Value *Args[] = {
5818 IdentTLoc, GTid,
5819 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5820 /*isSigned=*/true),
5821 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5823 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5824 return CGF.EmitRuntimeCall(
5825 OMPBuilder.getOrCreateRuntimeFunction(
5826 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5827 Args);
5828 }
5829 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5830 llvm::Value *Args[] = {
5831 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5832 /*isSigned=*/true),
5833 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5835 CGM.VoidPtrTy)};
5836 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5837 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5838 Args);
5839}
5840
5842 SourceLocation Loc,
5843 bool IsWorksharingReduction) {
5844 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5845 // is_ws, int num, void *data);
5846 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5847 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5848 CGM.IntTy, /*isSigned=*/true);
5849 llvm::Value *Args[] = {IdentTLoc, GTid,
5850 llvm::ConstantInt::get(CGM.IntTy,
5851 IsWorksharingReduction ? 1 : 0,
5852 /*isSigned=*/true)};
5853 (void)CGF.EmitRuntimeCall(
5854 OMPBuilder.getOrCreateRuntimeFunction(
5855 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5856 Args);
5857}
5858
5860 SourceLocation Loc,
5861 ReductionCodeGen &RCG,
5862 unsigned N) {
5863 auto Sizes = RCG.getSizes(N);
5864 // Emit threadprivate global variable if the type is non-constant
5865 // (Sizes.second = nullptr).
5866 if (Sizes.second) {
5867 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5868 /*isSigned=*/false);
5870 CGF, CGM.getContext().getSizeType(),
5871 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5872 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5873 }
5874}
5875
5877 SourceLocation Loc,
5878 llvm::Value *ReductionsPtr,
5879 LValue SharedLVal) {
5880 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5881 // *d);
5882 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5883 CGM.IntTy,
5884 /*isSigned=*/true),
5885 ReductionsPtr,
5887 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5888 return Address(
5889 CGF.EmitRuntimeCall(
5890 OMPBuilder.getOrCreateRuntimeFunction(
5891 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5892 Args),
5893 CGF.Int8Ty, SharedLVal.getAlignment());
5894}
5895
5897 const OMPTaskDataTy &Data) {
5898 if (!CGF.HaveInsertPoint())
5899 return;
5900
5901 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5902 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5903 OMPBuilder.createTaskwait(CGF.Builder);
5904 } else {
5905 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5906 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5907 auto &M = CGM.getModule();
5908 Address DependenciesArray = Address::invalid();
5909 llvm::Value *NumOfElements;
5910 std::tie(NumOfElements, DependenciesArray) =
5911 emitDependClause(CGF, Data.Dependences, Loc);
5912 if (!Data.Dependences.empty()) {
5913 llvm::Value *DepWaitTaskArgs[7];
5914 DepWaitTaskArgs[0] = UpLoc;
5915 DepWaitTaskArgs[1] = ThreadID;
5916 DepWaitTaskArgs[2] = NumOfElements;
5917 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5918 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5919 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5920 DepWaitTaskArgs[6] =
5921 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5922
5923 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5924
5925 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5926 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5927 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5928 // kmp_int32 has_no_wait); if dependence info is specified.
5929 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5930 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5931 DepWaitTaskArgs);
5932
5933 } else {
5934
5935 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5936 // global_tid);
5937 llvm::Value *Args[] = {UpLoc, ThreadID};
5938 // Ignore return result until untied tasks are supported.
5939 CGF.EmitRuntimeCall(
5940 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5941 Args);
5942 }
5943 }
5944
5945 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5946 Region->emitUntiedSwitch(CGF);
5947}
5948
5950 OpenMPDirectiveKind InnerKind,
5951 const RegionCodeGenTy &CodeGen,
5952 bool HasCancel) {
5953 if (!CGF.HaveInsertPoint())
5954 return;
5955 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5956 InnerKind != OMPD_critical &&
5957 InnerKind != OMPD_master &&
5958 InnerKind != OMPD_masked);
5959 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5960}
5961
5962namespace {
5963enum RTCancelKind {
5964 CancelNoreq = 0,
5965 CancelParallel = 1,
5966 CancelLoop = 2,
5967 CancelSections = 3,
5968 CancelTaskgroup = 4
5969};
5970} // anonymous namespace
5971
5972static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5973 RTCancelKind CancelKind = CancelNoreq;
5974 if (CancelRegion == OMPD_parallel)
5975 CancelKind = CancelParallel;
5976 else if (CancelRegion == OMPD_for)
5977 CancelKind = CancelLoop;
5978 else if (CancelRegion == OMPD_sections)
5979 CancelKind = CancelSections;
5980 else {
5981 assert(CancelRegion == OMPD_taskgroup);
5982 CancelKind = CancelTaskgroup;
5983 }
5984 return CancelKind;
5985}
5986
5989 OpenMPDirectiveKind CancelRegion) {
5990 if (!CGF.HaveInsertPoint())
5991 return;
5992 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5993 // global_tid, kmp_int32 cncl_kind);
5994 if (auto *OMPRegionInfo =
5995 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5996 // For 'cancellation point taskgroup', the task region info may not have a
5997 // cancel. This may instead happen in another adjacent task.
5998 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5999 llvm::Value *Args[] = {
6000 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6001 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6002 // Ignore return result until untied tasks are supported.
6003 llvm::Value *Result = CGF.EmitRuntimeCall(
6004 OMPBuilder.getOrCreateRuntimeFunction(
6005 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6006 Args);
6007 // if (__kmpc_cancellationpoint()) {
6008 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6009 // exit from construct;
6010 // }
6011 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6012 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6013 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6014 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6015 CGF.EmitBlock(ExitBB);
6016 if (CancelRegion == OMPD_parallel)
6017 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6018 // exit from construct;
6019 CodeGenFunction::JumpDest CancelDest =
6020 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6021 CGF.EmitBranchThroughCleanup(CancelDest);
6022 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6023 }
6024 }
6025}
6026
6028 const Expr *IfCond,
6029 OpenMPDirectiveKind CancelRegion) {
6030 if (!CGF.HaveInsertPoint())
6031 return;
6032 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6033 // kmp_int32 cncl_kind);
6034 auto &M = CGM.getModule();
6035 if (auto *OMPRegionInfo =
6036 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6037 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6038 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6039 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6040 llvm::Value *Args[] = {
6041 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6042 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6043 // Ignore return result until untied tasks are supported.
6044 llvm::Value *Result = CGF.EmitRuntimeCall(
6045 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6046 // if (__kmpc_cancel()) {
6047 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6048 // exit from construct;
6049 // }
6050 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6051 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6052 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6053 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6054 CGF.EmitBlock(ExitBB);
6055 if (CancelRegion == OMPD_parallel)
6056 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6057 // exit from construct;
6058 CodeGenFunction::JumpDest CancelDest =
6059 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6060 CGF.EmitBranchThroughCleanup(CancelDest);
6061 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6062 };
6063 if (IfCond) {
6064 emitIfClause(CGF, IfCond, ThenGen,
6065 [](CodeGenFunction &, PrePostActionTy &) {});
6066 } else {
6067 RegionCodeGenTy ThenRCG(ThenGen);
6068 ThenRCG(CGF);
6069 }
6070 }
6071}
6072
6073namespace {
6074/// Cleanup action for uses_allocators support.
6075class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6077
6078public:
6079 OMPUsesAllocatorsActionTy(
6080 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6081 : Allocators(Allocators) {}
6082 void Enter(CodeGenFunction &CGF) override {
6083 if (!CGF.HaveInsertPoint())
6084 return;
6085 for (const auto &AllocatorData : Allocators) {
6087 CGF, AllocatorData.first, AllocatorData.second);
6088 }
6089 }
6090 void Exit(CodeGenFunction &CGF) override {
6091 if (!CGF.HaveInsertPoint())
6092 return;
6093 for (const auto &AllocatorData : Allocators) {
6095 AllocatorData.first);
6096 }
6097 }
6098};
6099} // namespace
6100
6102 const OMPExecutableDirective &D, StringRef ParentName,
6103 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6104 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6105 assert(!ParentName.empty() && "Invalid target entry parent name!");
6108 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6109 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6110 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6111 if (!D.AllocatorTraits)
6112 continue;
6113 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6114 }
6115 }
6116 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6117 CodeGen.setAction(UsesAllocatorAction);
6118 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6119 IsOffloadEntry, CodeGen);
6120}
6121
6123 const Expr *Allocator,
6124 const Expr *AllocatorTraits) {
6125 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6126 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6127 // Use default memspace handle.
6128 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6129 llvm::Value *NumTraits = llvm::ConstantInt::get(
6131 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6132 ->getSize()
6133 .getLimitedValue());
6134 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6136 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6137 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6138 AllocatorTraitsLVal.getBaseInfo(),
6139 AllocatorTraitsLVal.getTBAAInfo());
6140 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6141
6142 llvm::Value *AllocatorVal =
6143 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6144 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6145 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6146 // Store to allocator.
6148 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6149 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6150 AllocatorVal =
6151 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6152 Allocator->getType(), Allocator->getExprLoc());
6153 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6154}
6155
6157 const Expr *Allocator) {
6158 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6159 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6160 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6161 llvm::Value *AllocatorVal =
6162 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6163 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6164 CGF.getContext().VoidPtrTy,
6165 Allocator->getExprLoc());
6166 (void)CGF.EmitRuntimeCall(
6167 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6168 OMPRTL___kmpc_destroy_allocator),
6169 {ThreadId, AllocatorVal});
6170}
6171
6174 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6175 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6176 "invalid default attrs structure");
6177 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6178 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6179
6180 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6181 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6182 /*UpperBoundOnly=*/true);
6183
6184 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6185 for (auto *A : C->getAttrs()) {
6186 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6187 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6188 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6189 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6190 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6191 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6192 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6193 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6194 &AttrMaxThreadsVal);
6195 else
6196 continue;
6197
6198 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6199 if (AttrMaxThreadsVal > 0)
6200 MaxThreadsVal = MaxThreadsVal > 0
6201 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6202 : AttrMaxThreadsVal;
6203 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6204 if (AttrMaxBlocksVal > 0)
6205 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6206 : AttrMaxBlocksVal;
6207 }
6208 }
6209}
6210
6212 const OMPExecutableDirective &D, StringRef ParentName,
6213 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6214 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6215
6216 llvm::TargetRegionEntryInfo EntryInfo =
6217 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6218
6219 CodeGenFunction CGF(CGM, true);
6220 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6221 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6222 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6223
6224 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6225 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6226 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6227 };
6228
6229 cantFail(OMPBuilder.emitTargetRegionFunction(
6230 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6231 OutlinedFnID));
6232
6233 if (!OutlinedFn)
6234 return;
6235
6236 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6237
6238 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6239 for (auto *A : C->getAttrs()) {
6240 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6241 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6242 }
6243 }
6244}
6245
6246/// Checks if the expression is constant or does not have non-trivial function
6247/// calls.
6248static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6249 // We can skip constant expressions.
6250 // We can skip expressions with trivial calls or simple expressions.
6252 !E->hasNonTrivialCall(Ctx)) &&
6253 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6254}
6255
6257 const Stmt *Body) {
6258 const Stmt *Child = Body->IgnoreContainers();
6259 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6260 Child = nullptr;
6261 for (const Stmt *S : C->body()) {
6262 if (const auto *E = dyn_cast<Expr>(S)) {
6263 if (isTrivial(Ctx, E))
6264 continue;
6265 }
6266 // Some of the statements can be ignored.
6269 continue;
6270 // Analyze declarations.
6271 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6272 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6273 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6274 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6275 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6276 isa<UsingDirectiveDecl>(D) ||
6277 isa<OMPDeclareReductionDecl>(D) ||
6278 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6279 return true;
6280 const auto *VD = dyn_cast<VarDecl>(D);
6281 if (!VD)
6282 return false;
6283 return VD->hasGlobalStorage() || !VD->isUsed();
6284 }))
6285 continue;
6286 }
6287 // Found multiple children - cannot get the one child only.
6288 if (Child)
6289 return nullptr;
6290 Child = S;
6291 }
6292 if (Child)
6293 Child = Child->IgnoreContainers();
6294 }
6295 return Child;
6296}
6297
6299 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6300 int32_t &MaxTeamsVal) {
6301
6302 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6303 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6304 "Expected target-based executable directive.");
6305 switch (DirectiveKind) {
6306 case OMPD_target: {
6307 const auto *CS = D.getInnermostCapturedStmt();
6308 const auto *Body =
6309 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6310 const Stmt *ChildStmt =
6312 if (const auto *NestedDir =
6313 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6314 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6315 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6316 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6317 ->getNumTeams()
6318 .front();
6319 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6320 if (auto Constant =
6321 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6322 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6323 return NumTeams;
6324 }
6325 MinTeamsVal = MaxTeamsVal = 0;
6326 return nullptr;
6327 }
6328 MinTeamsVal = MaxTeamsVal = 1;
6329 return nullptr;
6330 }
6331 // A value of -1 is used to check if we need to emit no teams region
6332 MinTeamsVal = MaxTeamsVal = -1;
6333 return nullptr;
6334 }
6335 case OMPD_target_teams_loop:
6336 case OMPD_target_teams:
6337 case OMPD_target_teams_distribute:
6338 case OMPD_target_teams_distribute_simd:
6339 case OMPD_target_teams_distribute_parallel_for:
6340 case OMPD_target_teams_distribute_parallel_for_simd: {
6341 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6342 const Expr *NumTeams =
6343 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6344 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6345 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6346 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6347 return NumTeams;
6348 }
6349 MinTeamsVal = MaxTeamsVal = 0;
6350 return nullptr;
6351 }
6352 case OMPD_target_parallel:
6353 case OMPD_target_parallel_for:
6354 case OMPD_target_parallel_for_simd:
6355 case OMPD_target_parallel_loop:
6356 case OMPD_target_simd:
6357 MinTeamsVal = MaxTeamsVal = 1;
6358 return nullptr;
6359 case OMPD_parallel:
6360 case OMPD_for:
6361 case OMPD_parallel_for:
6362 case OMPD_parallel_loop:
6363 case OMPD_parallel_master:
6364 case OMPD_parallel_sections:
6365 case OMPD_for_simd:
6366 case OMPD_parallel_for_simd:
6367 case OMPD_cancel:
6368 case OMPD_cancellation_point:
6369 case OMPD_ordered:
6370 case OMPD_threadprivate:
6371 case OMPD_allocate:
6372 case OMPD_task:
6373 case OMPD_simd:
6374 case OMPD_tile:
6375 case OMPD_unroll:
6376 case OMPD_sections:
6377 case OMPD_section:
6378 case OMPD_single:
6379 case OMPD_master:
6380 case OMPD_critical:
6381 case OMPD_taskyield:
6382 case OMPD_barrier:
6383 case OMPD_taskwait:
6384 case OMPD_taskgroup:
6385 case OMPD_atomic:
6386 case OMPD_flush:
6387 case OMPD_depobj:
6388 case OMPD_scan:
6389 case OMPD_teams:
6390 case OMPD_target_data:
6391 case OMPD_target_exit_data:
6392 case OMPD_target_enter_data:
6393 case OMPD_distribute:
6394 case OMPD_distribute_simd:
6395 case OMPD_distribute_parallel_for:
6396 case OMPD_distribute_parallel_for_simd:
6397 case OMPD_teams_distribute:
6398 case OMPD_teams_distribute_simd:
6399 case OMPD_teams_distribute_parallel_for:
6400 case OMPD_teams_distribute_parallel_for_simd:
6401 case OMPD_target_update:
6402 case OMPD_declare_simd:
6403 case OMPD_declare_variant:
6404 case OMPD_begin_declare_variant:
6405 case OMPD_end_declare_variant:
6406 case OMPD_declare_target:
6407 case OMPD_end_declare_target:
6408 case OMPD_declare_reduction:
6409 case OMPD_declare_mapper:
6410 case OMPD_taskloop:
6411 case OMPD_taskloop_simd:
6412 case OMPD_master_taskloop:
6413 case OMPD_master_taskloop_simd:
6414 case OMPD_parallel_master_taskloop:
6415 case OMPD_parallel_master_taskloop_simd:
6416 case OMPD_requires:
6417 case OMPD_metadirective:
6418 case OMPD_unknown:
6419 break;
6420 default:
6421 break;
6422 }
6423 llvm_unreachable("Unexpected directive kind.");
6424}
6425
6427 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6428 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6429 "Clauses associated with the teams directive expected to be emitted "
6430 "only for the host!");
6431 CGBuilderTy &Bld = CGF.Builder;
6432 int32_t MinNT = -1, MaxNT = -1;
6433 const Expr *NumTeams =
6434 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6435 if (NumTeams != nullptr) {
6436 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6437
6438 switch (DirectiveKind) {
6439 case OMPD_target: {
6440 const auto *CS = D.getInnermostCapturedStmt();
6441 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6442 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6443 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6444 /*IgnoreResultAssign*/ true);
6445 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6446 /*isSigned=*/true);
6447 }
6448 case OMPD_target_teams:
6449 case OMPD_target_teams_distribute:
6450 case OMPD_target_teams_distribute_simd:
6451 case OMPD_target_teams_distribute_parallel_for:
6452 case OMPD_target_teams_distribute_parallel_for_simd: {
6453 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6454 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6455 /*IgnoreResultAssign*/ true);
6456 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6457 /*isSigned=*/true);
6458 }
6459 default:
6460 break;
6461 }
6462 }
6463
6464 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6465 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6466}
6467
6468/// Check for a num threads constant value (stored in \p DefaultVal), or
6469/// expression (stored in \p E). If the value is conditional (via an if-clause),
6470/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6471/// nullptr, no expression evaluation is perfomed.
6472static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6473 const Expr **E, int32_t &UpperBound,
6474 bool UpperBoundOnly, llvm::Value **CondVal) {
6476 CGF.getContext(), CS->getCapturedStmt());
6477 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6478 if (!Dir)
6479 return;
6480
6481 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6482 // Handle if clause. If if clause present, the number of threads is
6483 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6484 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6485 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6486 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6487 const OMPIfClause *IfClause = nullptr;
6488 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6489 if (C->getNameModifier() == OMPD_unknown ||
6490 C->getNameModifier() == OMPD_parallel) {
6491 IfClause = C;
6492 break;
6493 }
6494 }
6495 if (IfClause) {
6496 const Expr *CondExpr = IfClause->getCondition();
6497 bool Result;
6498 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6499 if (!Result) {
6500 UpperBound = 1;
6501 return;
6502 }
6503 } else {
6505 if (const auto *PreInit =
6506 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6507 for (const auto *I : PreInit->decls()) {
6508 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6509 CGF.EmitVarDecl(cast<VarDecl>(*I));
6510 } else {
6513 CGF.EmitAutoVarCleanups(Emission);
6514 }
6515 }
6516 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6517 }
6518 }
6519 }
6520 }
6521 // Check the value of num_threads clause iff if clause was not specified
6522 // or is not evaluated to false.
6523 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6524 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6525 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6526 const auto *NumThreadsClause =
6527 Dir->getSingleClause<OMPNumThreadsClause>();
6528 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6529 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6530 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6531 UpperBound =
6532 UpperBound
6533 ? Constant->getZExtValue()
6534 : std::min(UpperBound,
6535 static_cast<int32_t>(Constant->getZExtValue()));
6536 // If we haven't found a upper bound, remember we saw a thread limiting
6537 // clause.
6538 if (UpperBound == -1)
6539 UpperBound = 0;
6540 if (!E)
6541 return;
6542 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6543 if (const auto *PreInit =
6544 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6545 for (const auto *I : PreInit->decls()) {
6546 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6547 CGF.EmitVarDecl(cast<VarDecl>(*I));
6548 } else {
6551 CGF.EmitAutoVarCleanups(Emission);
6552 }
6553 }
6554 }
6555 *E = NTExpr;
6556 }
6557 return;
6558 }
6559 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6560 UpperBound = 1;
6561}
6562
6564 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6565 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6566 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6567 "Clauses associated with the teams directive expected to be emitted "
6568 "only for the host!");
6569 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6570 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6571 "Expected target-based executable directive.");
6572
6573 const Expr *NT = nullptr;
6574 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6575
6576 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6577 if (E->isIntegerConstantExpr(CGF.getContext())) {
6578 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6579 UpperBound = UpperBound ? Constant->getZExtValue()
6580 : std::min(UpperBound,
6581 int32_t(Constant->getZExtValue()));
6582 }
6583 // If we haven't found a upper bound, remember we saw a thread limiting
6584 // clause.
6585 if (UpperBound == -1)
6586 UpperBound = 0;
6587 if (EPtr)
6588 *EPtr = E;
6589 };
6590
6591 auto ReturnSequential = [&]() {
6592 UpperBound = 1;
6593 return NT;
6594 };
6595
6596 switch (DirectiveKind) {
6597 case OMPD_target: {
6598 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6599 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6601 CGF.getContext(), CS->getCapturedStmt());
6602 // TODO: The standard is not clear how to resolve two thread limit clauses,
6603 // let's pick the teams one if it's present, otherwise the target one.
6604 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6605 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6606 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6607 ThreadLimitClause = TLC;
6608 if (ThreadLimitExpr) {
6609 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6610 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6612 CGF,
6613 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6614 if (const auto *PreInit =
6615 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6616 for (const auto *I : PreInit->decls()) {
6617 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6618 CGF.EmitVarDecl(cast<VarDecl>(*I));
6619 } else {
6622 CGF.EmitAutoVarCleanups(Emission);
6623 }
6624 }
6625 }
6626 }
6627 }
6628 }
6629 if (ThreadLimitClause)
6630 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6631 ThreadLimitExpr);
6632 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6633 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6634 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6635 CS = Dir->getInnermostCapturedStmt();
6637 CGF.getContext(), CS->getCapturedStmt());
6638 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6639 }
6640 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6641 CS = Dir->getInnermostCapturedStmt();
6642 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6643 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6644 return ReturnSequential();
6645 }
6646 return NT;
6647 }
6648 case OMPD_target_teams: {
6649 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6650 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6651 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6652 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6653 ThreadLimitExpr);
6654 }
6655 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6656 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6658 CGF.getContext(), CS->getCapturedStmt());
6659 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6660 if (Dir->getDirectiveKind() == OMPD_distribute) {
6661 CS = Dir->getInnermostCapturedStmt();
6662 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6663 }
6664 }
6665 return NT;
6666 }
6667 case OMPD_target_teams_distribute:
6668 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6669 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6670 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6671 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6672 ThreadLimitExpr);
6673 }
6674 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6675 UpperBoundOnly, CondVal);
6676 return NT;
6677 case OMPD_target_teams_loop:
6678 case OMPD_target_parallel_loop:
6679 case OMPD_target_parallel:
6680 case OMPD_target_parallel_for:
6681 case OMPD_target_parallel_for_simd:
6682 case OMPD_target_teams_distribute_parallel_for:
6683 case OMPD_target_teams_distribute_parallel_for_simd: {
6684 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6685 const OMPIfClause *IfClause = nullptr;
6686 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6687 if (C->getNameModifier() == OMPD_unknown ||
6688 C->getNameModifier() == OMPD_parallel) {
6689 IfClause = C;
6690 break;
6691 }
6692 }
6693 if (IfClause) {
6694 const Expr *Cond = IfClause->getCondition();
6695 bool Result;
6696 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6697 if (!Result)
6698 return ReturnSequential();
6699 } else {
6701 *CondVal = CGF.EvaluateExprAsBool(Cond);
6702 }
6703 }
6704 }
6705 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6706 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6707 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6708 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6709 ThreadLimitExpr);
6710 }
6711 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6712 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6713 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6714 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6715 return NumThreadsClause->getNumThreads();
6716 }
6717 return NT;
6718 }
6719 case OMPD_target_teams_distribute_simd:
6720 case OMPD_target_simd:
6721 return ReturnSequential();
6722 default:
6723 break;
6724 }
6725 llvm_unreachable("Unsupported directive kind.");
6726}
6727
6729 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6730 llvm::Value *NumThreadsVal = nullptr;
6731 llvm::Value *CondVal = nullptr;
6732 llvm::Value *ThreadLimitVal = nullptr;
6733 const Expr *ThreadLimitExpr = nullptr;
6734 int32_t UpperBound = -1;
6735
6737 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6738 &ThreadLimitExpr);
6739
6740 // Thread limit expressions are used below, emit them.
6741 if (ThreadLimitExpr) {
6742 ThreadLimitVal =
6743 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6744 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6745 /*isSigned=*/false);
6746 }
6747
6748 // Generate the num teams expression.
6749 if (UpperBound == 1) {
6750 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6751 } else if (NT) {
6752 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6753 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6754 /*isSigned=*/false);
6755 } else if (ThreadLimitVal) {
6756 // If we do not have a num threads value but a thread limit, replace the
6757 // former with the latter. We know handled the thread limit expression.
6758 NumThreadsVal = ThreadLimitVal;
6759 ThreadLimitVal = nullptr;
6760 } else {
6761 // Default to "0" which means runtime choice.
6762 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6763 NumThreadsVal = CGF.Builder.getInt32(0);
6764 }
6765
6766 // Handle if clause. If if clause present, the number of threads is
6767 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6768 if (CondVal) {
6770 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6771 CGF.Builder.getInt32(1));
6772 }
6773
6774 // If the thread limit and num teams expression were present, take the
6775 // minimum.
6776 if (ThreadLimitVal) {
6777 NumThreadsVal = CGF.Builder.CreateSelect(
6778 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6779 ThreadLimitVal, NumThreadsVal);
6780 }
6781
6782 return NumThreadsVal;
6783}
6784
6785namespace {
6787
6788// Utility to handle information from clauses associated with a given
6789// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6790// It provides a convenient interface to obtain the information and generate
6791// code for that information.
6792class MappableExprsHandler {
6793public:
6794 /// Custom comparator for attach-pointer expressions that compares them by
6795 /// complexity (i.e. their component-depth) first, then by the order in which
6796 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6797 /// different.
6798 struct AttachPtrExprComparator {
6799 const MappableExprsHandler &Handler;
6800 // Cache of previous equality comparison results.
6801 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6802 CachedEqualityComparisons;
6803
6804 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6805 AttachPtrExprComparator() = delete;
6806
6807 // Return true iff LHS is "less than" RHS.
6808 bool operator()(const Expr *LHS, const Expr *RHS) const {
6809 if (LHS == RHS)
6810 return false;
6811
6812 // First, compare by complexity (depth)
6813 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6814 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6815
6816 std::optional<size_t> DepthLHS =
6817 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6818 : std::nullopt;
6819 std::optional<size_t> DepthRHS =
6820 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6821 : std::nullopt;
6822
6823 // std::nullopt (no attach pointer) has lowest complexity
6824 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6825 // Both have same complexity, now check semantic equality
6826 if (areEqual(LHS, RHS))
6827 return false;
6828 // Different semantically, compare by computation order
6829 return wasComputedBefore(LHS, RHS);
6830 }
6831 if (!DepthLHS.has_value())
6832 return true; // LHS has lower complexity
6833 if (!DepthRHS.has_value())
6834 return false; // RHS has lower complexity
6835
6836 // Both have values, compare by depth (lower depth = lower complexity)
6837 if (DepthLHS.value() != DepthRHS.value())
6838 return DepthLHS.value() < DepthRHS.value();
6839
6840 // Same complexity, now check semantic equality
6841 if (areEqual(LHS, RHS))
6842 return false;
6843 // Different semantically, compare by computation order
6844 return wasComputedBefore(LHS, RHS);
6845 }
6846
6847 public:
6848 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
6849 /// results, if available, otherwise does a recursive semantic comparison.
6850 bool areEqual(const Expr *LHS, const Expr *RHS) const {
6851 // Check cache first for faster lookup
6852 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
6853 if (CachedResultIt != CachedEqualityComparisons.end())
6854 return CachedResultIt->second;
6855
6856 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
6857
6858 // Cache the result for future lookups (both orders since semantic
6859 // equality is commutative)
6860 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
6861 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
6862 return ComparisonResult;
6863 }
6864
6865 /// Compare the two attach-ptr expressions by their computation order.
6866 /// Returns true iff LHS was computed before RHS by
6867 /// collectAttachPtrExprInfo().
6868 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
6869 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
6870 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
6871
6872 return OrderLHS < OrderRHS;
6873 }
6874
6875 private:
6876 /// Helper function to compare attach-pointer expressions semantically.
6877 /// This function handles various expression types that can be part of an
6878 /// attach-pointer.
6879 /// TODO: Not urgent, but we should ideally return true when comparing
6880 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
6881 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
6882 if (LHS == RHS)
6883 return true;
6884
6885 // If only one is null, they aren't equal
6886 if (!LHS || !RHS)
6887 return false;
6888
6889 ASTContext &Ctx = Handler.CGF.getContext();
6890 // Strip away parentheses and no-op casts to get to the core expression
6891 LHS = LHS->IgnoreParenNoopCasts(Ctx);
6892 RHS = RHS->IgnoreParenNoopCasts(Ctx);
6893
6894 // Direct pointer comparison of the underlying expressions
6895 if (LHS == RHS)
6896 return true;
6897
6898 // Check if the expression classes match
6899 if (LHS->getStmtClass() != RHS->getStmtClass())
6900 return false;
6901
6902 // Handle DeclRefExpr (variable references)
6903 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
6904 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
6905 if (!RD)
6906 return false;
6907 return LD->getDecl()->getCanonicalDecl() ==
6908 RD->getDecl()->getCanonicalDecl();
6909 }
6910
6911 // Handle ArraySubscriptExpr (array indexing like a[i])
6912 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
6913 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
6914 if (!RA)
6915 return false;
6916 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
6917 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
6918 }
6919
6920 // Handle MemberExpr (member access like s.m or p->m)
6921 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
6922 const auto *RM = dyn_cast<MemberExpr>(RHS);
6923 if (!RM)
6924 return false;
6925 if (LM->getMemberDecl()->getCanonicalDecl() !=
6926 RM->getMemberDecl()->getCanonicalDecl())
6927 return false;
6928 return areSemanticallyEqual(LM->getBase(), RM->getBase());
6929 }
6930
6931 // Handle UnaryOperator (unary operations like *p, &x, etc.)
6932 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
6933 const auto *RU = dyn_cast<UnaryOperator>(RHS);
6934 if (!RU)
6935 return false;
6936 if (LU->getOpcode() != RU->getOpcode())
6937 return false;
6938 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
6939 }
6940
6941 // Handle BinaryOperator (binary operations like p + offset)
6942 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
6943 const auto *RB = dyn_cast<BinaryOperator>(RHS);
6944 if (!RB)
6945 return false;
6946 if (LB->getOpcode() != RB->getOpcode())
6947 return false;
6948 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
6949 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
6950 }
6951
6952 // Handle ArraySectionExpr (array sections like a[0:1])
6953 // Attach pointers should not contain array-sections, but currently we
6954 // don't emit an error.
6955 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
6956 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
6957 if (!RAS)
6958 return false;
6959 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
6960 areSemanticallyEqual(LAS->getLowerBound(),
6961 RAS->getLowerBound()) &&
6962 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
6963 }
6964
6965 // Handle CastExpr (explicit casts)
6966 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
6967 const auto *RC = dyn_cast<CastExpr>(RHS);
6968 if (!RC)
6969 return false;
6970 if (LC->getCastKind() != RC->getCastKind())
6971 return false;
6972 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
6973 }
6974
6975 // Handle CXXThisExpr (this pointer)
6976 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
6977 return true;
6978
6979 // Handle IntegerLiteral (integer constants)
6980 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
6981 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
6982 if (!RI)
6983 return false;
6984 return LI->getValue() == RI->getValue();
6985 }
6986
6987 // Handle CharacterLiteral (character constants)
6988 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
6989 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
6990 if (!RC)
6991 return false;
6992 return LC->getValue() == RC->getValue();
6993 }
6994
6995 // Handle FloatingLiteral (floating point constants)
6996 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
6997 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
6998 if (!RF)
6999 return false;
7000 // Use bitwise comparison for floating point literals
7001 return LF->getValue().bitwiseIsEqual(RF->getValue());
7002 }
7003
7004 // Handle StringLiteral (string constants)
7005 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7006 const auto *RS = dyn_cast<StringLiteral>(RHS);
7007 if (!RS)
7008 return false;
7009 return LS->getString() == RS->getString();
7010 }
7011
7012 // Handle CXXNullPtrLiteralExpr (nullptr)
7014 return true;
7015
7016 // Handle CXXBoolLiteralExpr (true/false)
7017 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7018 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7019 if (!RB)
7020 return false;
7021 return LB->getValue() == RB->getValue();
7022 }
7023
7024 // Fallback for other forms - use the existing comparison method
7025 return Expr::isSameComparisonOperand(LHS, RHS);
7026 }
7027 };
7028
7029 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7030 static unsigned getFlagMemberOffset() {
7031 unsigned Offset = 0;
7032 for (uint64_t Remain =
7033 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7034 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7035 !(Remain & 1); Remain = Remain >> 1)
7036 Offset++;
7037 return Offset;
7038 }
7039
7040 /// Class that holds debugging information for a data mapping to be passed to
7041 /// the runtime library.
7042 class MappingExprInfo {
7043 /// The variable declaration used for the data mapping.
7044 const ValueDecl *MapDecl = nullptr;
7045 /// The original expression used in the map clause, or null if there is
7046 /// none.
7047 const Expr *MapExpr = nullptr;
7048
7049 public:
7050 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7051 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7052
7053 const ValueDecl *getMapDecl() const { return MapDecl; }
7054 const Expr *getMapExpr() const { return MapExpr; }
7055 };
7056
7057 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7058 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7059 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7060 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7061 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7062 using MapNonContiguousArrayTy =
7063 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7064 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7065 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7066 using MapData =
7068 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7069 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7070 using MapDataArrayTy = SmallVector<MapData, 4>;
7071
7072 /// This structure contains combined information generated for mappable
7073 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7074 /// mappers, and non-contiguous information.
7075 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7076 MapExprsArrayTy Exprs;
7077 MapValueDeclsArrayTy Mappers;
7078 MapValueDeclsArrayTy DevicePtrDecls;
7079
7080 /// Append arrays in \a CurInfo.
7081 void append(MapCombinedInfoTy &CurInfo) {
7082 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7083 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7084 CurInfo.DevicePtrDecls.end());
7085 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7086 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7087 }
7088 };
7089
7090 /// Map between a struct and the its lowest & highest elements which have been
7091 /// mapped.
7092 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7093 /// HE(FieldIndex, Pointer)}
7094 struct StructRangeInfoTy {
7095 MapCombinedInfoTy PreliminaryMapData;
7096 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7097 0, Address::invalid()};
7098 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7099 0, Address::invalid()};
7100 Address Base = Address::invalid();
7101 Address LB = Address::invalid();
7102 bool IsArraySection = false;
7103 bool HasCompleteRecord = false;
7104 };
7105
7106 /// A struct to store the attach pointer and pointee information, to be used
7107 /// when emitting an attach entry.
7108 struct AttachInfoTy {
7109 Address AttachPtrAddr = Address::invalid();
7110 Address AttachPteeAddr = Address::invalid();
7111 const ValueDecl *AttachPtrDecl = nullptr;
7112 const Expr *AttachMapExpr = nullptr;
7113
7114 bool isValid() const {
7115 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7116 }
7117 };
7118
7119 /// Check if there's any component list where the attach pointer expression
7120 /// matches the given captured variable.
7121 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7122 for (const auto &AttachEntry : AttachPtrExprMap) {
7123 if (AttachEntry.second) {
7124 // Check if the attach pointer expression is a DeclRefExpr that
7125 // references the captured variable
7126 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7127 if (DRE->getDecl() == VD)
7128 return true;
7129 }
7130 }
7131 return false;
7132 }
7133
7134 /// Get the previously-cached attach pointer for a component list, if-any.
7135 const Expr *getAttachPtrExpr(
7137 const {
7138 const auto It = AttachPtrExprMap.find(Components);
7139 if (It != AttachPtrExprMap.end())
7140 return It->second;
7141
7142 return nullptr;
7143 }
7144
7145private:
7146 /// Kind that defines how a device pointer has to be returned.
7147 struct MapInfo {
7150 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7151 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7152 bool ReturnDevicePointer = false;
7153 bool IsImplicit = false;
7154 const ValueDecl *Mapper = nullptr;
7155 const Expr *VarRef = nullptr;
7156 bool ForDeviceAddr = false;
7157
7158 MapInfo() = default;
7159 MapInfo(
7161 OpenMPMapClauseKind MapType,
7162 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7163 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7164 bool ReturnDevicePointer, bool IsImplicit,
7165 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7166 bool ForDeviceAddr = false)
7167 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7168 MotionModifiers(MotionModifiers),
7169 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7170 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7171 };
7172
7173 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7174 /// member and there is no map information about it, then emission of that
7175 /// entry is deferred until the whole struct has been processed.
7176 struct DeferredDevicePtrEntryTy {
7177 const Expr *IE = nullptr;
7178 const ValueDecl *VD = nullptr;
7179 bool ForDeviceAddr = false;
7180
7181 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7182 bool ForDeviceAddr)
7183 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7184 };
7185
7186 /// The target directive from where the mappable clauses were extracted. It
7187 /// is either a executable directive or a user-defined mapper directive.
7188 llvm::PointerUnion<const OMPExecutableDirective *,
7189 const OMPDeclareMapperDecl *>
7190 CurDir;
7191
7192 /// Function the directive is being generated for.
7193 CodeGenFunction &CGF;
7194
7195 /// Set of all first private variables in the current directive.
7196 /// bool data is set to true if the variable is implicitly marked as
7197 /// firstprivate, false otherwise.
7198 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7199
7200 /// Map between device pointer declarations and their expression components.
7201 /// The key value for declarations in 'this' is null.
7202 llvm::DenseMap<
7203 const ValueDecl *,
7204 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7205 DevPointersMap;
7206
7207 /// Map between device addr declarations and their expression components.
7208 /// The key value for declarations in 'this' is null.
7209 llvm::DenseMap<
7210 const ValueDecl *,
7211 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7212 HasDevAddrsMap;
7213
7214 /// Map between lambda declarations and their map type.
7215 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7216
7217 /// Map from component lists to their attach pointer expressions.
7219 const Expr *>
7220 AttachPtrExprMap;
7221
7222 /// Map from attach pointer expressions to their component depth.
7223 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7224 /// expressions with increasing/decreasing depth.
7225 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7226 /// TODO: Not urgent, but we should ideally use the number of pointer
7227 /// dereferences in an expr as an indicator of its complexity, instead of the
7228 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7229 /// `*(p + 5 + 5)` together.
7230 llvm::DenseMap<const Expr *, std::optional<size_t>>
7231 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7232
7233 /// Map from attach pointer expressions to the order they were computed in, in
7234 /// collectAttachPtrExprInfo().
7235 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7236 {nullptr, 0}};
7237
7238 /// An instance of attach-ptr-expr comparator that can be used throughout the
7239 /// lifetime of this handler.
7240 AttachPtrExprComparator AttachPtrComparator;
7241
7242 llvm::Value *getExprTypeSize(const Expr *E) const {
7243 QualType ExprTy = E->getType().getCanonicalType();
7244
7245 // Calculate the size for array shaping expression.
7246 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7247 llvm::Value *Size =
7248 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7249 for (const Expr *SE : OAE->getDimensions()) {
7250 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7251 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7252 CGF.getContext().getSizeType(),
7253 SE->getExprLoc());
7254 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7255 }
7256 return Size;
7257 }
7258
7259 // Reference types are ignored for mapping purposes.
7260 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7261 ExprTy = RefTy->getPointeeType().getCanonicalType();
7262
7263 // Given that an array section is considered a built-in type, we need to
7264 // do the calculation based on the length of the section instead of relying
7265 // on CGF.getTypeSize(E->getType()).
7266 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7267 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7268 OAE->getBase()->IgnoreParenImpCasts())
7270
7271 // If there is no length associated with the expression and lower bound is
7272 // not specified too, that means we are using the whole length of the
7273 // base.
7274 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7275 !OAE->getLowerBound())
7276 return CGF.getTypeSize(BaseTy);
7277
7278 llvm::Value *ElemSize;
7279 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7280 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7281 } else {
7282 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7283 assert(ATy && "Expecting array type if not a pointer type.");
7284 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7285 }
7286
7287 // If we don't have a length at this point, that is because we have an
7288 // array section with a single element.
7289 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7290 return ElemSize;
7291
7292 if (const Expr *LenExpr = OAE->getLength()) {
7293 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7294 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7295 CGF.getContext().getSizeType(),
7296 LenExpr->getExprLoc());
7297 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7298 }
7299 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7300 OAE->getLowerBound() && "expected array_section[lb:].");
7301 // Size = sizetype - lb * elemtype;
7302 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7303 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7304 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7305 CGF.getContext().getSizeType(),
7306 OAE->getLowerBound()->getExprLoc());
7307 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7308 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7309 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7310 LengthVal = CGF.Builder.CreateSelect(
7311 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7312 return LengthVal;
7313 }
7314 return CGF.getTypeSize(ExprTy);
7315 }
7316
7317 /// Return the corresponding bits for a given map clause modifier. Add
7318 /// a flag marking the map as a pointer if requested. Add a flag marking the
7319 /// map as the first one of a series of maps that relate to the same map
7320 /// expression.
7321 OpenMPOffloadMappingFlags getMapTypeBits(
7322 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7323 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7324 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7325 OpenMPOffloadMappingFlags Bits =
7326 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7327 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7328 switch (MapType) {
7329 case OMPC_MAP_alloc:
7330 case OMPC_MAP_release:
7331 // alloc and release is the default behavior in the runtime library, i.e.
7332 // if we don't pass any bits alloc/release that is what the runtime is
7333 // going to do. Therefore, we don't need to signal anything for these two
7334 // type modifiers.
7335 break;
7336 case OMPC_MAP_to:
7337 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7338 break;
7339 case OMPC_MAP_from:
7340 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7341 break;
7342 case OMPC_MAP_tofrom:
7343 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7344 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7345 break;
7346 case OMPC_MAP_delete:
7347 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7348 break;
7349 case OMPC_MAP_unknown:
7350 llvm_unreachable("Unexpected map type!");
7351 }
7352 if (AddPtrFlag)
7353 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7354 if (AddIsTargetParamFlag)
7355 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7356 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7357 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7358 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7359 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7360 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7361 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7362 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7363 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7364 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7365 if (IsNonContiguous)
7366 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7367 return Bits;
7368 }
7369
7370 /// Return true if the provided expression is a final array section. A
7371 /// final array section, is one whose length can't be proved to be one.
7372 bool isFinalArraySectionExpression(const Expr *E) const {
7373 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7374
7375 // It is not an array section and therefore not a unity-size one.
7376 if (!OASE)
7377 return false;
7378
7379 // An array section with no colon always refer to a single element.
7380 if (OASE->getColonLocFirst().isInvalid())
7381 return false;
7382
7383 const Expr *Length = OASE->getLength();
7384
7385 // If we don't have a length we have to check if the array has size 1
7386 // for this dimension. Also, we should always expect a length if the
7387 // base type is pointer.
7388 if (!Length) {
7389 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7390 OASE->getBase()->IgnoreParenImpCasts())
7392 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7393 return ATy->getSExtSize() != 1;
7394 // If we don't have a constant dimension length, we have to consider
7395 // the current section as having any size, so it is not necessarily
7396 // unitary. If it happen to be unity size, that's user fault.
7397 return true;
7398 }
7399
7400 // Check if the length evaluates to 1.
7401 Expr::EvalResult Result;
7402 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7403 return true; // Can have more that size 1.
7404
7405 llvm::APSInt ConstLength = Result.Val.getInt();
7406 return ConstLength.getSExtValue() != 1;
7407 }
7408
7409 /// A helper class to copy structures with overlapped elements, i.e. those
7410 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7411 /// are not explicitly copied have mapping nodes synthesized for them,
7412 /// taking care to avoid generating zero-sized copies.
7413 class CopyOverlappedEntryGaps {
7414 CodeGenFunction &CGF;
7415 MapCombinedInfoTy &CombinedInfo;
7416 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7417 const ValueDecl *MapDecl = nullptr;
7418 const Expr *MapExpr = nullptr;
7419 Address BP = Address::invalid();
7420 bool IsNonContiguous = false;
7421 uint64_t DimSize = 0;
7422 // These elements track the position as the struct is iterated over
7423 // (in order of increasing element address).
7424 const RecordDecl *LastParent = nullptr;
7425 uint64_t Cursor = 0;
7426 unsigned LastIndex = -1u;
7427 Address LB = Address::invalid();
7428
7429 public:
7430 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7431 MapCombinedInfoTy &CombinedInfo,
7432 OpenMPOffloadMappingFlags Flags,
7433 const ValueDecl *MapDecl, const Expr *MapExpr,
7434 Address BP, Address LB, bool IsNonContiguous,
7435 uint64_t DimSize)
7436 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7437 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7438 DimSize(DimSize), LB(LB) {}
7439
7440 void processField(
7441 const OMPClauseMappableExprCommon::MappableComponent &MC,
7442 const FieldDecl *FD,
7443 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7444 EmitMemberExprBase) {
7445 const RecordDecl *RD = FD->getParent();
7446 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7447 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7448 uint64_t FieldSize =
7450 Address ComponentLB = Address::invalid();
7451
7452 if (FD->getType()->isLValueReferenceType()) {
7453 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7454 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7455 ComponentLB =
7456 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7457 } else {
7458 ComponentLB =
7460 }
7461
7462 if (!LastParent)
7463 LastParent = RD;
7464 if (FD->getParent() == LastParent) {
7465 if (FD->getFieldIndex() != LastIndex + 1)
7466 copyUntilField(FD, ComponentLB);
7467 } else {
7468 LastParent = FD->getParent();
7469 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7470 copyUntilField(FD, ComponentLB);
7471 }
7472 Cursor = FieldOffset + FieldSize;
7473 LastIndex = FD->getFieldIndex();
7474 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7475 }
7476
7477 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7478 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7479 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7480 llvm::Value *Size =
7481 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7482 copySizedChunk(LBPtr, Size);
7483 }
7484
7485 void copyUntilEnd(Address HB) {
7486 if (LastParent) {
7487 const ASTRecordLayout &RL =
7488 CGF.getContext().getASTRecordLayout(LastParent);
7489 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7490 return;
7491 }
7492 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7493 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7494 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7495 LBPtr);
7496 copySizedChunk(LBPtr, Size);
7497 }
7498
7499 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7500 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7501 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7502 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7503 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7504 CombinedInfo.Pointers.push_back(Base);
7505 CombinedInfo.Sizes.push_back(
7506 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7507 CombinedInfo.Types.push_back(Flags);
7508 CombinedInfo.Mappers.push_back(nullptr);
7509 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7510 }
7511 };
7512
7513 /// Generate the base pointers, section pointers, sizes, map type bits, and
7514 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7515 /// map type, map or motion modifiers, and expression components.
7516 /// \a IsFirstComponent should be set to true if the provided set of
7517 /// components is the first associated with a capture.
7518 void generateInfoForComponentList(
7519 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7520 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7522 MapCombinedInfoTy &CombinedInfo,
7523 MapCombinedInfoTy &StructBaseCombinedInfo,
7524 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7525 bool IsImplicit, bool GenerateAllInfoForClauses,
7526 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7527 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7528 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7529 OverlappedElements = {},
7530 bool AreBothBasePtrAndPteeMapped = false) const {
7531 // The following summarizes what has to be generated for each map and the
7532 // types below. The generated information is expressed in this order:
7533 // base pointer, section pointer, size, flags
7534 // (to add to the ones that come from the map type and modifier).
7535 //
7536 // double d;
7537 // int i[100];
7538 // float *p;
7539 // int **a = &i;
7540 //
7541 // struct S1 {
7542 // int i;
7543 // float f[50];
7544 // }
7545 // struct S2 {
7546 // int i;
7547 // float f[50];
7548 // S1 s;
7549 // double *p;
7550 // struct S2 *ps;
7551 // int &ref;
7552 // }
7553 // S2 s;
7554 // S2 *ps;
7555 //
7556 // map(d)
7557 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7558 //
7559 // map(i)
7560 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7561 //
7562 // map(i[1:23])
7563 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7564 //
7565 // map(p)
7566 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7567 //
7568 // map(p[1:24])
7569 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7570 // in unified shared memory mode or for local pointers
7571 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7572 //
7573 // map((*a)[0:3])
7574 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7575 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7576 //
7577 // map(**a)
7578 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7579 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7580 //
7581 // map(s)
7582 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7583 //
7584 // map(s.i)
7585 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7586 //
7587 // map(s.s.f)
7588 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7589 //
7590 // map(s.p)
7591 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7592 //
7593 // map(to: s.p[:22])
7594 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7595 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7596 // &(s.p), &(s.p[0]), 22*sizeof(double),
7597 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7598 // (*) alloc space for struct members, only this is a target parameter
7599 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7600 // optimizes this entry out, same in the examples below)
7601 // (***) map the pointee (map: to)
7602 //
7603 // map(to: s.ref)
7604 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7605 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7606 // (*) alloc space for struct members, only this is a target parameter
7607 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7608 // optimizes this entry out, same in the examples below)
7609 // (***) map the pointee (map: to)
7610 //
7611 // map(s.ps)
7612 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7613 //
7614 // map(from: s.ps->s.i)
7615 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7616 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7617 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7618 //
7619 // map(to: s.ps->ps)
7620 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7621 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7622 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7623 //
7624 // map(s.ps->ps->ps)
7625 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7626 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7627 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7628 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7629 //
7630 // map(to: s.ps->ps->s.f[:22])
7631 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7632 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7633 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7634 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7635 //
7636 // map(ps)
7637 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7638 //
7639 // map(ps->i)
7640 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7641 //
7642 // map(ps->s.f)
7643 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7644 //
7645 // map(from: ps->p)
7646 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7647 //
7648 // map(to: ps->p[:22])
7649 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7650 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7651 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7652 //
7653 // map(ps->ps)
7654 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7655 //
7656 // map(from: ps->ps->s.i)
7657 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7658 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7659 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7660 //
7661 // map(from: ps->ps->ps)
7662 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7663 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7664 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7665 //
7666 // map(ps->ps->ps->ps)
7667 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7668 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7669 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7670 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7671 //
7672 // map(to: ps->ps->ps->s.f[:22])
7673 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7674 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7675 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7676 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7677 //
7678 // map(to: s.f[:22]) map(from: s.p[:33])
7679 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7680 // sizeof(double*) (**), TARGET_PARAM
7681 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7682 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7683 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7684 // (*) allocate contiguous space needed to fit all mapped members even if
7685 // we allocate space for members not mapped (in this example,
7686 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7687 // them as well because they fall between &s.f[0] and &s.p)
7688 //
7689 // map(from: s.f[:22]) map(to: ps->p[:33])
7690 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7691 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7692 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7693 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7694 // (*) the struct this entry pertains to is the 2nd element in the list of
7695 // arguments, hence MEMBER_OF(2)
7696 //
7697 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7698 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7699 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7700 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7701 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7702 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7703 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7704 // (*) the struct this entry pertains to is the 4th element in the list
7705 // of arguments, hence MEMBER_OF(4)
7706 //
7707 // map(p, p[:100])
7708 // ===> map(p[:100])
7709 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7710
7711 // Track if the map information being generated is the first for a capture.
7712 bool IsCaptureFirstInfo = IsFirstComponentList;
7713 // When the variable is on a declare target link or in a to clause with
7714 // unified memory, a reference is needed to hold the host/device address
7715 // of the variable.
7716 bool RequiresReference = false;
7717
7718 // Scan the components from the base to the complete expression.
7719 auto CI = Components.rbegin();
7720 auto CE = Components.rend();
7721 auto I = CI;
7722
7723 // Track if the map information being generated is the first for a list of
7724 // components.
7725 bool IsExpressionFirstInfo = true;
7726 bool FirstPointerInComplexData = false;
7727 Address BP = Address::invalid();
7728 const Expr *AssocExpr = I->getAssociatedExpression();
7729 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7730 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7731 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7732
7733 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7734 return;
7735 if (isa<MemberExpr>(AssocExpr)) {
7736 // The base is the 'this' pointer. The content of the pointer is going
7737 // to be the base of the field being mapped.
7738 BP = CGF.LoadCXXThisAddress();
7739 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7740 (OASE &&
7741 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7742 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7743 } else if (OAShE &&
7744 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7745 BP = Address(
7746 CGF.EmitScalarExpr(OAShE->getBase()),
7747 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7748 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7749 } else {
7750 // The base is the reference to the variable.
7751 // BP = &Var.
7752 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7753 if (const auto *VD =
7754 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7755 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7756 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7757 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7758 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7759 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7761 RequiresReference = true;
7763 }
7764 }
7765 }
7766
7767 // If the variable is a pointer and is being dereferenced (i.e. is not
7768 // the last component), the base has to be the pointer itself, not its
7769 // reference. References are ignored for mapping purposes.
7770 QualType Ty =
7771 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7772 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7773 // No need to generate individual map information for the pointer, it
7774 // can be associated with the combined storage if shared memory mode is
7775 // active or the base declaration is not global variable.
7776 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7777 if (!AreBothBasePtrAndPteeMapped &&
7779 !VD || VD->hasLocalStorage()))
7780 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7781 else
7782 FirstPointerInComplexData = true;
7783 ++I;
7784 }
7785 }
7786
7787 // Track whether a component of the list should be marked as MEMBER_OF some
7788 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7789 // in a component list should be marked as MEMBER_OF, all subsequent entries
7790 // do not belong to the base struct. E.g.
7791 // struct S2 s;
7792 // s.ps->ps->ps->f[:]
7793 // (1) (2) (3) (4)
7794 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7795 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7796 // is the pointee of ps(2) which is not member of struct s, so it should not
7797 // be marked as such (it is still PTR_AND_OBJ).
7798 // The variable is initialized to false so that PTR_AND_OBJ entries which
7799 // are not struct members are not considered (e.g. array of pointers to
7800 // data).
7801 bool ShouldBeMemberOf = false;
7802
7803 // Variable keeping track of whether or not we have encountered a component
7804 // in the component list which is a member expression. Useful when we have a
7805 // pointer or a final array section, in which case it is the previous
7806 // component in the list which tells us whether we have a member expression.
7807 // E.g. X.f[:]
7808 // While processing the final array section "[:]" it is "f" which tells us
7809 // whether we are dealing with a member of a declared struct.
7810 const MemberExpr *EncounteredME = nullptr;
7811
7812 // Track for the total number of dimension. Start from one for the dummy
7813 // dimension.
7814 uint64_t DimSize = 1;
7815
7816 // Detects non-contiguous updates due to strided accesses.
7817 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7818 // correctly when generating information to be passed to the runtime. The
7819 // flag is set to true if any array section has a stride not equal to 1, or
7820 // if the stride is not a constant expression (conservatively assumed
7821 // non-contiguous).
7822 bool IsNonContiguous =
7823 CombinedInfo.NonContigInfo.IsNonContiguous ||
7824 any_of(Components, [&](const auto &Component) {
7825 const auto *OASE =
7826 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7827 if (!OASE)
7828 return false;
7829
7830 const Expr *StrideExpr = OASE->getStride();
7831 if (!StrideExpr)
7832 return false;
7833
7834 const auto Constant =
7835 StrideExpr->getIntegerConstantExpr(CGF.getContext());
7836 if (!Constant)
7837 return false;
7838
7839 return !Constant->isOne();
7840 });
7841
7842 bool IsPrevMemberReference = false;
7843
7844 bool IsPartialMapped =
7845 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7846
7847 // We need to check if we will be encountering any MEs. If we do not
7848 // encounter any ME expression it means we will be mapping the whole struct.
7849 // In that case we need to skip adding an entry for the struct to the
7850 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7851 // list only when generating all info for clauses.
7852 bool IsMappingWholeStruct = true;
7853 if (!GenerateAllInfoForClauses) {
7854 IsMappingWholeStruct = false;
7855 } else {
7856 for (auto TempI = I; TempI != CE; ++TempI) {
7857 const MemberExpr *PossibleME =
7858 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7859 if (PossibleME) {
7860 IsMappingWholeStruct = false;
7861 break;
7862 }
7863 }
7864 }
7865
7866 for (; I != CE; ++I) {
7867 // If the current component is member of a struct (parent struct) mark it.
7868 if (!EncounteredME) {
7869 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7870 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7871 // as MEMBER_OF the parent struct.
7872 if (EncounteredME) {
7873 ShouldBeMemberOf = true;
7874 // Do not emit as complex pointer if this is actually not array-like
7875 // expression.
7876 if (FirstPointerInComplexData) {
7877 QualType Ty = std::prev(I)
7878 ->getAssociatedDeclaration()
7879 ->getType()
7880 .getNonReferenceType();
7881 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7882 FirstPointerInComplexData = false;
7883 }
7884 }
7885 }
7886
7887 auto Next = std::next(I);
7888
7889 // We need to generate the addresses and sizes if this is the last
7890 // component, if the component is a pointer or if it is an array section
7891 // whose length can't be proved to be one. If this is a pointer, it
7892 // becomes the base address for the following components.
7893
7894 // A final array section, is one whose length can't be proved to be one.
7895 // If the map item is non-contiguous then we don't treat any array section
7896 // as final array section.
7897 bool IsFinalArraySection =
7898 !IsNonContiguous &&
7899 isFinalArraySectionExpression(I->getAssociatedExpression());
7900
7901 // If we have a declaration for the mapping use that, otherwise use
7902 // the base declaration of the map clause.
7903 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7904 ? I->getAssociatedDeclaration()
7905 : BaseDecl;
7906 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7907 : MapExpr;
7908
7909 // Get information on whether the element is a pointer. Have to do a
7910 // special treatment for array sections given that they are built-in
7911 // types.
7912 const auto *OASE =
7913 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7914 const auto *OAShE =
7915 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7916 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7917 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7918 bool IsPointer =
7919 OAShE ||
7922 ->isAnyPointerType()) ||
7923 I->getAssociatedExpression()->getType()->isAnyPointerType();
7924 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7925 MapDecl &&
7926 MapDecl->getType()->isLValueReferenceType();
7927 bool IsNonDerefPointer = IsPointer &&
7928 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7929 !IsNonContiguous;
7930
7931 if (OASE)
7932 ++DimSize;
7933
7934 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7935 IsFinalArraySection) {
7936 // If this is not the last component, we expect the pointer to be
7937 // associated with an array expression or member expression.
7938 assert((Next == CE ||
7939 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7940 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7941 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7942 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7943 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7944 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7945 "Unexpected expression");
7946
7947 Address LB = Address::invalid();
7948 Address LowestElem = Address::invalid();
7949 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7950 const MemberExpr *E) {
7951 const Expr *BaseExpr = E->getBase();
7952 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7953 // scalar.
7954 LValue BaseLV;
7955 if (E->isArrow()) {
7956 LValueBaseInfo BaseInfo;
7957 TBAAAccessInfo TBAAInfo;
7958 Address Addr =
7959 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7960 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7961 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7962 } else {
7963 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7964 }
7965 return BaseLV;
7966 };
7967 if (OAShE) {
7968 LowestElem = LB =
7969 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7971 OAShE->getBase()->getType()->getPointeeType()),
7973 OAShE->getBase()->getType()));
7974 } else if (IsMemberReference) {
7975 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7976 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7977 LowestElem = CGF.EmitLValueForFieldInitialization(
7978 BaseLVal, cast<FieldDecl>(MapDecl))
7979 .getAddress();
7980 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7981 .getAddress();
7982 } else {
7983 LowestElem = LB =
7984 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7985 .getAddress();
7986 }
7987
7988 // If this component is a pointer inside the base struct then we don't
7989 // need to create any entry for it - it will be combined with the object
7990 // it is pointing to into a single PTR_AND_OBJ entry.
7991 bool IsMemberPointerOrAddr =
7992 EncounteredME &&
7993 (((IsPointer || ForDeviceAddr) &&
7994 I->getAssociatedExpression() == EncounteredME) ||
7995 (IsPrevMemberReference && !IsPointer) ||
7996 (IsMemberReference && Next != CE &&
7997 !Next->getAssociatedExpression()->getType()->isPointerType()));
7998 if (!OverlappedElements.empty() && Next == CE) {
7999 // Handle base element with the info for overlapped elements.
8000 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8001 assert(!IsPointer &&
8002 "Unexpected base element with the pointer type.");
8003 // Mark the whole struct as the struct that requires allocation on the
8004 // device.
8005 PartialStruct.LowestElem = {0, LowestElem};
8006 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8007 I->getAssociatedExpression()->getType());
8008 Address HB = CGF.Builder.CreateConstGEP(
8010 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8011 TypeSize.getQuantity() - 1);
8012 PartialStruct.HighestElem = {
8013 std::numeric_limits<decltype(
8014 PartialStruct.HighestElem.first)>::max(),
8015 HB};
8016 PartialStruct.Base = BP;
8017 PartialStruct.LB = LB;
8018 assert(
8019 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8020 "Overlapped elements must be used only once for the variable.");
8021 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8022 // Emit data for non-overlapped data.
8023 OpenMPOffloadMappingFlags Flags =
8024 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8025 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8026 /*AddPtrFlag=*/false,
8027 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8028 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8029 MapExpr, BP, LB, IsNonContiguous,
8030 DimSize);
8031 // Do bitcopy of all non-overlapped structure elements.
8033 Component : OverlappedElements) {
8034 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8035 Component) {
8036 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8037 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8038 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8039 }
8040 }
8041 }
8042 }
8043 CopyGaps.copyUntilEnd(HB);
8044 break;
8045 }
8046 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8047 // Skip adding an entry in the CurInfo of this combined entry if the
8048 // whole struct is currently being mapped. The struct needs to be added
8049 // in the first position before any data internal to the struct is being
8050 // mapped.
8051 // Skip adding an entry in the CurInfo of this combined entry if the
8052 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8053 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8054 (Next == CE && MapType != OMPC_MAP_unknown)) {
8055 if (!IsMappingWholeStruct) {
8056 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8057 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8058 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8059 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8060 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8061 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8062 Size, CGF.Int64Ty, /*isSigned=*/true));
8063 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8064 : 1);
8065 } else {
8066 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8067 StructBaseCombinedInfo.BasePointers.push_back(
8068 BP.emitRawPointer(CGF));
8069 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8070 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8071 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8072 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8073 Size, CGF.Int64Ty, /*isSigned=*/true));
8074 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8075 IsNonContiguous ? DimSize : 1);
8076 }
8077
8078 // If Mapper is valid, the last component inherits the mapper.
8079 bool HasMapper = Mapper && Next == CE;
8080 if (!IsMappingWholeStruct)
8081 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8082 else
8083 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8084 : nullptr);
8085
8086 // We need to add a pointer flag for each map that comes from the
8087 // same expression except for the first one. We also need to signal
8088 // this map is the first one that relates with the current capture
8089 // (there is a set of entries for each capture).
8090 OpenMPOffloadMappingFlags Flags =
8091 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8092 !IsExpressionFirstInfo || RequiresReference ||
8093 FirstPointerInComplexData || IsMemberReference,
8094 AreBothBasePtrAndPteeMapped ||
8095 (IsCaptureFirstInfo && !RequiresReference),
8096 IsNonContiguous);
8097
8098 if (!IsExpressionFirstInfo || IsMemberReference) {
8099 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8100 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8101 if (IsPointer || (IsMemberReference && Next != CE))
8102 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8103 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8104 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8105 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8106 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8107
8108 if (ShouldBeMemberOf) {
8109 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8110 // should be later updated with the correct value of MEMBER_OF.
8111 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8112 // From now on, all subsequent PTR_AND_OBJ entries should not be
8113 // marked as MEMBER_OF.
8114 ShouldBeMemberOf = false;
8115 }
8116 }
8117
8118 if (!IsMappingWholeStruct)
8119 CombinedInfo.Types.push_back(Flags);
8120 else
8121 StructBaseCombinedInfo.Types.push_back(Flags);
8122 }
8123
8124 // If we have encountered a member expression so far, keep track of the
8125 // mapped member. If the parent is "*this", then the value declaration
8126 // is nullptr.
8127 if (EncounteredME) {
8128 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8129 unsigned FieldIndex = FD->getFieldIndex();
8130
8131 // Update info about the lowest and highest elements for this struct
8132 if (!PartialStruct.Base.isValid()) {
8133 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8134 if (IsFinalArraySection && OASE) {
8135 Address HB =
8136 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8137 .getAddress();
8138 PartialStruct.HighestElem = {FieldIndex, HB};
8139 } else {
8140 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8141 }
8142 PartialStruct.Base = BP;
8143 PartialStruct.LB = BP;
8144 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8145 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8146 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8147 if (IsFinalArraySection && OASE) {
8148 Address HB =
8149 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8150 .getAddress();
8151 PartialStruct.HighestElem = {FieldIndex, HB};
8152 } else {
8153 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8154 }
8155 }
8156 }
8157
8158 // Need to emit combined struct for array sections.
8159 if (IsFinalArraySection || IsNonContiguous)
8160 PartialStruct.IsArraySection = true;
8161
8162 // If we have a final array section, we are done with this expression.
8163 if (IsFinalArraySection)
8164 break;
8165
8166 // The pointer becomes the base for the next element.
8167 if (Next != CE)
8168 BP = IsMemberReference ? LowestElem : LB;
8169 if (!IsPartialMapped)
8170 IsExpressionFirstInfo = false;
8171 IsCaptureFirstInfo = false;
8172 FirstPointerInComplexData = false;
8173 IsPrevMemberReference = IsMemberReference;
8174 } else if (FirstPointerInComplexData) {
8175 QualType Ty = Components.rbegin()
8176 ->getAssociatedDeclaration()
8177 ->getType()
8178 .getNonReferenceType();
8179 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8180 FirstPointerInComplexData = false;
8181 }
8182 }
8183 // If ran into the whole component - allocate the space for the whole
8184 // record.
8185 if (!EncounteredME)
8186 PartialStruct.HasCompleteRecord = true;
8187
8188 if (!IsNonContiguous)
8189 return;
8190
8191 const ASTContext &Context = CGF.getContext();
8192
8193 // For supporting stride in array section, we need to initialize the first
8194 // dimension size as 1, first offset as 0, and first count as 1
8195 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8196 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8197 MapValuesArrayTy CurStrides;
8198 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8199 uint64_t ElementTypeSize;
8200
8201 // Collect Size information for each dimension and get the element size as
8202 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8203 // should be [10, 10] and the first stride is 4 btyes.
8204 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8205 Components) {
8206 const Expr *AssocExpr = Component.getAssociatedExpression();
8207 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8208
8209 if (!OASE)
8210 continue;
8211
8212 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8213 auto *CAT = Context.getAsConstantArrayType(Ty);
8214 auto *VAT = Context.getAsVariableArrayType(Ty);
8215
8216 // We need all the dimension size except for the last dimension.
8217 assert((VAT || CAT || &Component == &*Components.begin()) &&
8218 "Should be either ConstantArray or VariableArray if not the "
8219 "first Component");
8220
8221 // Get element size if CurStrides is empty.
8222 if (CurStrides.empty()) {
8223 const Type *ElementType = nullptr;
8224 if (CAT)
8225 ElementType = CAT->getElementType().getTypePtr();
8226 else if (VAT)
8227 ElementType = VAT->getElementType().getTypePtr();
8228 else
8229 assert(&Component == &*Components.begin() &&
8230 "Only expect pointer (non CAT or VAT) when this is the "
8231 "first Component");
8232 // If ElementType is null, then it means the base is a pointer
8233 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8234 // for next iteration.
8235 if (ElementType) {
8236 // For the case that having pointer as base, we need to remove one
8237 // level of indirection.
8238 if (&Component != &*Components.begin())
8239 ElementType = ElementType->getPointeeOrArrayElementType();
8240 ElementTypeSize =
8241 Context.getTypeSizeInChars(ElementType).getQuantity();
8242 CurStrides.push_back(
8243 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8244 }
8245 }
8246 // Get dimension value except for the last dimension since we don't need
8247 // it.
8248 if (DimSizes.size() < Components.size() - 1) {
8249 if (CAT)
8250 DimSizes.push_back(
8251 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8252 else if (VAT)
8253 DimSizes.push_back(CGF.Builder.CreateIntCast(
8254 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8255 /*IsSigned=*/false));
8256 }
8257 }
8258
8259 // Skip the dummy dimension since we have already have its information.
8260 auto *DI = DimSizes.begin() + 1;
8261 // Product of dimension.
8262 llvm::Value *DimProd =
8263 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8264
8265 // Collect info for non-contiguous. Notice that offset, count, and stride
8266 // are only meaningful for array-section, so we insert a null for anything
8267 // other than array-section.
8268 // Also, the size of offset, count, and stride are not the same as
8269 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8270 // count, and stride are the same as the number of non-contiguous
8271 // declaration in target update to/from clause.
8272 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8273 Components) {
8274 const Expr *AssocExpr = Component.getAssociatedExpression();
8275
8276 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8277 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8278 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8279 /*isSigned=*/false);
8280 CurOffsets.push_back(Offset);
8281 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8282 CurStrides.push_back(CurStrides.back());
8283 continue;
8284 }
8285
8286 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8287
8288 if (!OASE)
8289 continue;
8290
8291 // Offset
8292 const Expr *OffsetExpr = OASE->getLowerBound();
8293 llvm::Value *Offset = nullptr;
8294 if (!OffsetExpr) {
8295 // If offset is absent, then we just set it to zero.
8296 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8297 } else {
8298 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8299 CGF.Int64Ty,
8300 /*isSigned=*/false);
8301 }
8302 CurOffsets.push_back(Offset);
8303
8304 // Count
8305 const Expr *CountExpr = OASE->getLength();
8306 llvm::Value *Count = nullptr;
8307 if (!CountExpr) {
8308 // In Clang, once a high dimension is an array section, we construct all
8309 // the lower dimension as array section, however, for case like
8310 // arr[0:2][2], Clang construct the inner dimension as an array section
8311 // but it actually is not in an array section form according to spec.
8312 if (!OASE->getColonLocFirst().isValid() &&
8313 !OASE->getColonLocSecond().isValid()) {
8314 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8315 } else {
8316 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8317 // When the length is absent it defaults to ⌈(size −
8318 // lower-bound)/stride⌉, where size is the size of the array
8319 // dimension.
8320 const Expr *StrideExpr = OASE->getStride();
8321 llvm::Value *Stride =
8322 StrideExpr
8323 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8324 CGF.Int64Ty, /*isSigned=*/false)
8325 : nullptr;
8326 if (Stride)
8327 Count = CGF.Builder.CreateUDiv(
8328 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8329 else
8330 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8331 }
8332 } else {
8333 Count = CGF.EmitScalarExpr(CountExpr);
8334 }
8335 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8336 CurCounts.push_back(Count);
8337
8338 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8339 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8340 // Offset Count Stride
8341 // D0 0 1 4 (int) <- dummy dimension
8342 // D1 0 2 8 (2 * (1) * 4)
8343 // D2 1 2 20 (1 * (1 * 5) * 4)
8344 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8345 const Expr *StrideExpr = OASE->getStride();
8346 llvm::Value *Stride =
8347 StrideExpr
8348 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8349 CGF.Int64Ty, /*isSigned=*/false)
8350 : nullptr;
8351 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8352 if (Stride)
8353 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8354 else
8355 CurStrides.push_back(DimProd);
8356 if (DI != DimSizes.end())
8357 ++DI;
8358 }
8359
8360 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8361 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8362 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8363 }
8364
8365 /// Return the adjusted map modifiers if the declaration a capture refers to
8366 /// appears in a first-private clause. This is expected to be used only with
8367 /// directives that start with 'target'.
8368 OpenMPOffloadMappingFlags
8369 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8370 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8371
8372 // A first private variable captured by reference will use only the
8373 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8374 // declaration is known as first-private in this handler.
8375 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8376 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8377 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8378 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8379 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8380 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8381 }
8382 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8383 if (I != LambdasMap.end())
8384 // for map(to: lambda): using user specified map type.
8385 return getMapTypeBits(
8386 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8387 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8388 /*AddPtrFlag=*/false,
8389 /*AddIsTargetParamFlag=*/false,
8390 /*isNonContiguous=*/false);
8391 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8392 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8393 }
8394
8395 void getPlainLayout(const CXXRecordDecl *RD,
8396 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8397 bool AsBase) const {
8398 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8399
8400 llvm::StructType *St =
8401 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8402
8403 unsigned NumElements = St->getNumElements();
8404 llvm::SmallVector<
8405 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8406 RecordLayout(NumElements);
8407
8408 // Fill bases.
8409 for (const auto &I : RD->bases()) {
8410 if (I.isVirtual())
8411 continue;
8412
8413 QualType BaseTy = I.getType();
8414 const auto *Base = BaseTy->getAsCXXRecordDecl();
8415 // Ignore empty bases.
8416 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8417 CGF.getContext()
8418 .getASTRecordLayout(Base)
8420 .isZero())
8421 continue;
8422
8423 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8424 RecordLayout[FieldIndex] = Base;
8425 }
8426 // Fill in virtual bases.
8427 for (const auto &I : RD->vbases()) {
8428 QualType BaseTy = I.getType();
8429 // Ignore empty bases.
8430 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8431 continue;
8432
8433 const auto *Base = BaseTy->getAsCXXRecordDecl();
8434 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8435 if (RecordLayout[FieldIndex])
8436 continue;
8437 RecordLayout[FieldIndex] = Base;
8438 }
8439 // Fill in all the fields.
8440 assert(!RD->isUnion() && "Unexpected union.");
8441 for (const auto *Field : RD->fields()) {
8442 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8443 // will fill in later.)
8444 if (!Field->isBitField() &&
8445 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8446 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8447 RecordLayout[FieldIndex] = Field;
8448 }
8449 }
8450 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8451 &Data : RecordLayout) {
8452 if (Data.isNull())
8453 continue;
8454 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8455 getPlainLayout(Base, Layout, /*AsBase=*/true);
8456 else
8457 Layout.push_back(cast<const FieldDecl *>(Data));
8458 }
8459 }
8460
8461 /// Returns the address corresponding to \p PointerExpr.
8462 static Address getAttachPtrAddr(const Expr *PointerExpr,
8463 CodeGenFunction &CGF) {
8464 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8465 Address AttachPtrAddr = Address::invalid();
8466
8467 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8468 // If the pointer is a variable, we can use its address directly.
8469 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8470 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8471 AttachPtrAddr =
8472 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8473 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8474 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8475 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8476 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8477 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8478 assert(UO->getOpcode() == UO_Deref &&
8479 "Unexpected unary-operator on attach-ptr-expr");
8480 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8481 }
8482 assert(AttachPtrAddr.isValid() &&
8483 "Failed to get address for attach pointer expression");
8484 return AttachPtrAddr;
8485 }
8486
8487 /// Get the address of the attach pointer, and a load from it, to get the
8488 /// pointee base address.
8489 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8490 /// contains invalid addresses if \p AttachPtrExpr is null.
8491 static std::pair<Address, Address>
8492 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8493 CodeGenFunction &CGF) {
8494
8495 if (!AttachPtrExpr)
8496 return {Address::invalid(), Address::invalid()};
8497
8498 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8499 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8500
8501 QualType AttachPtrType =
8504
8505 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8506 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8507 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8508
8509 return {AttachPtrAddr, AttachPteeBaseAddr};
8510 }
8511
8512 /// Returns whether an attach entry should be emitted for a map on
8513 /// \p MapBaseDecl on the directive \p CurDir.
8514 static bool
8515 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8516 CodeGenFunction &CGF,
8517 llvm::PointerUnion<const OMPExecutableDirective *,
8518 const OMPDeclareMapperDecl *>
8519 CurDir) {
8520 if (!PointerExpr)
8521 return false;
8522
8523 // Pointer attachment is needed at map-entering time or for declare
8524 // mappers.
8525 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8528 ->getDirectiveKind());
8529 }
8530
8531 /// Computes the attach-ptr expr for \p Components, and updates various maps
8532 /// with the information.
8533 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8534 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8535 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8536 /// AttachPtrExprMap.
8537 void collectAttachPtrExprInfo(
8539 llvm::PointerUnion<const OMPExecutableDirective *,
8540 const OMPDeclareMapperDecl *>
8541 CurDir) {
8542
8543 OpenMPDirectiveKind CurDirectiveID =
8545 ? OMPD_declare_mapper
8546 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8547
8548 const auto &[AttachPtrExpr, Depth] =
8550 CurDirectiveID);
8551
8552 AttachPtrComputationOrderMap.try_emplace(
8553 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8554 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8555 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8556 }
8557
8558 /// Generate all the base pointers, section pointers, sizes, map types, and
8559 /// mappers for the extracted mappable expressions (all included in \a
8560 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8561 /// pair of the relevant declaration and index where it occurs is appended to
8562 /// the device pointers info array.
8563 void generateAllInfoForClauses(
8564 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8565 llvm::OpenMPIRBuilder &OMPBuilder,
8566 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8567 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8568 // We have to process the component lists that relate with the same
8569 // declaration in a single chunk so that we can generate the map flags
8570 // correctly. Therefore, we organize all lists in a map.
8571 enum MapKind { Present, Allocs, Other, Total };
8572 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8573 SmallVector<SmallVector<MapInfo, 8>, 4>>
8574 Info;
8575
8576 // Helper function to fill the information map for the different supported
8577 // clauses.
8578 auto &&InfoGen =
8579 [&Info, &SkipVarSet](
8580 const ValueDecl *D, MapKind Kind,
8582 OpenMPMapClauseKind MapType,
8583 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8584 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8585 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8586 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8587 if (SkipVarSet.contains(D))
8588 return;
8589 auto It = Info.try_emplace(D, Total).first;
8590 It->second[Kind].emplace_back(
8591 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8592 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8593 };
8594
8595 for (const auto *Cl : Clauses) {
8596 const auto *C = dyn_cast<OMPMapClause>(Cl);
8597 if (!C)
8598 continue;
8599 MapKind Kind = Other;
8600 if (llvm::is_contained(C->getMapTypeModifiers(),
8601 OMPC_MAP_MODIFIER_present))
8602 Kind = Present;
8603 else if (C->getMapType() == OMPC_MAP_alloc)
8604 Kind = Allocs;
8605 const auto *EI = C->getVarRefs().begin();
8606 for (const auto L : C->component_lists()) {
8607 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8608 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8609 C->getMapTypeModifiers(), {},
8610 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8611 E);
8612 ++EI;
8613 }
8614 }
8615 for (const auto *Cl : Clauses) {
8616 const auto *C = dyn_cast<OMPToClause>(Cl);
8617 if (!C)
8618 continue;
8619 MapKind Kind = Other;
8620 if (llvm::is_contained(C->getMotionModifiers(),
8621 OMPC_MOTION_MODIFIER_present))
8622 Kind = Present;
8623 const auto *EI = C->getVarRefs().begin();
8624 for (const auto L : C->component_lists()) {
8625 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8626 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8627 C->isImplicit(), std::get<2>(L), *EI);
8628 ++EI;
8629 }
8630 }
8631 for (const auto *Cl : Clauses) {
8632 const auto *C = dyn_cast<OMPFromClause>(Cl);
8633 if (!C)
8634 continue;
8635 MapKind Kind = Other;
8636 if (llvm::is_contained(C->getMotionModifiers(),
8637 OMPC_MOTION_MODIFIER_present))
8638 Kind = Present;
8639 const auto *EI = C->getVarRefs().begin();
8640 for (const auto L : C->component_lists()) {
8641 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8642 C->getMotionModifiers(),
8643 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8644 *EI);
8645 ++EI;
8646 }
8647 }
8648
8649 // Look at the use_device_ptr and use_device_addr clauses information and
8650 // mark the existing map entries as such. If there is no map information for
8651 // an entry in the use_device_ptr and use_device_addr list, we create one
8652 // with map type 'alloc' and zero size section. It is the user fault if that
8653 // was not mapped before. If there is no map information and the pointer is
8654 // a struct member, then we defer the emission of that entry until the whole
8655 // struct has been processed.
8656 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8657 SmallVector<DeferredDevicePtrEntryTy, 4>>
8658 DeferredInfo;
8659 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8660
8661 auto &&UseDeviceDataCombinedInfoGen =
8662 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8663 CodeGenFunction &CGF, bool IsDevAddr) {
8664 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8665 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8666 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8667 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8668 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8669 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8670 UseDeviceDataCombinedInfo.Sizes.push_back(
8671 llvm::Constant::getNullValue(CGF.Int64Ty));
8672 UseDeviceDataCombinedInfo.Types.push_back(
8673 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8674 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8675 };
8676
8677 auto &&MapInfoGen =
8678 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8679 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8681 Components,
8682 bool IsImplicit, bool IsDevAddr) {
8683 // We didn't find any match in our map information - generate a zero
8684 // size array section - if the pointer is a struct member we defer
8685 // this action until the whole struct has been processed.
8686 if (isa<MemberExpr>(IE)) {
8687 // Insert the pointer into Info to be processed by
8688 // generateInfoForComponentList. Because it is a member pointer
8689 // without a pointee, no entry will be generated for it, therefore
8690 // we need to generate one after the whole struct has been
8691 // processed. Nonetheless, generateInfoForComponentList must be
8692 // called to take the pointer into account for the calculation of
8693 // the range of the partial struct.
8694 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8695 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8696 IsDevAddr);
8697 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8698 } else {
8699 llvm::Value *Ptr;
8700 if (IsDevAddr) {
8701 if (IE->isGLValue())
8702 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8703 else
8704 Ptr = CGF.EmitScalarExpr(IE);
8705 } else {
8706 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8707 }
8708 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8709 }
8710 };
8711
8712 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8713 const Expr *IE, bool IsDevAddr) -> bool {
8714 // We potentially have map information for this declaration already.
8715 // Look for the first set of components that refer to it. If found,
8716 // return true.
8717 // If the first component is a member expression, we have to look into
8718 // 'this', which maps to null in the map of map information. Otherwise
8719 // look directly for the information.
8720 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8721 if (It != Info.end()) {
8722 bool Found = false;
8723 for (auto &Data : It->second) {
8724 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8725 return MI.Components.back().getAssociatedDeclaration() == VD;
8726 });
8727 // If we found a map entry, signal that the pointer has to be
8728 // returned and move on to the next declaration. Exclude cases where
8729 // the base pointer is mapped as array subscript, array section or
8730 // array shaping. The base address is passed as a pointer to base in
8731 // this case and cannot be used as a base for use_device_ptr list
8732 // item.
8733 if (CI != Data.end()) {
8734 if (IsDevAddr) {
8735 CI->ForDeviceAddr = IsDevAddr;
8736 CI->ReturnDevicePointer = true;
8737 Found = true;
8738 break;
8739 } else {
8740 auto PrevCI = std::next(CI->Components.rbegin());
8741 const auto *VarD = dyn_cast<VarDecl>(VD);
8743 isa<MemberExpr>(IE) ||
8744 !VD->getType().getNonReferenceType()->isPointerType() ||
8745 PrevCI == CI->Components.rend() ||
8746 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8747 VarD->hasLocalStorage()) {
8748 CI->ForDeviceAddr = IsDevAddr;
8749 CI->ReturnDevicePointer = true;
8750 Found = true;
8751 break;
8752 }
8753 }
8754 }
8755 }
8756 return Found;
8757 }
8758 return false;
8759 };
8760
8761 // Look at the use_device_ptr clause information and mark the existing map
8762 // entries as such. If there is no map information for an entry in the
8763 // use_device_ptr list, we create one with map type 'alloc' and zero size
8764 // section. It is the user fault if that was not mapped before. If there is
8765 // no map information and the pointer is a struct member, then we defer the
8766 // emission of that entry until the whole struct has been processed.
8767 for (const auto *Cl : Clauses) {
8768 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8769 if (!C)
8770 continue;
8771 for (const auto L : C->component_lists()) {
8773 std::get<1>(L);
8774 assert(!Components.empty() &&
8775 "Not expecting empty list of components!");
8776 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8778 const Expr *IE = Components.back().getAssociatedExpression();
8779 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8780 continue;
8781 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8782 /*IsDevAddr=*/false);
8783 }
8784 }
8785
8786 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8787 for (const auto *Cl : Clauses) {
8788 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8789 if (!C)
8790 continue;
8791 for (const auto L : C->component_lists()) {
8793 std::get<1>(L);
8794 assert(!std::get<1>(L).empty() &&
8795 "Not expecting empty list of components!");
8796 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8797 if (!Processed.insert(VD).second)
8798 continue;
8800 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8801 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8802 continue;
8803 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8804 /*IsDevAddr=*/true);
8805 }
8806 }
8807
8808 for (const auto &Data : Info) {
8809 StructRangeInfoTy PartialStruct;
8810 // Current struct information:
8811 MapCombinedInfoTy CurInfo;
8812 // Current struct base information:
8813 MapCombinedInfoTy StructBaseCurInfo;
8814 const Decl *D = Data.first;
8815 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8816 bool HasMapBasePtr = false;
8817 bool HasMapArraySec = false;
8818 if (VD && VD->getType()->isAnyPointerType()) {
8819 for (const auto &M : Data.second) {
8820 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8821 return isa_and_present<DeclRefExpr>(L.VarRef);
8822 });
8823 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8824 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8825 L.VarRef);
8826 });
8827 if (HasMapBasePtr && HasMapArraySec)
8828 break;
8829 }
8830 }
8831 for (const auto &M : Data.second) {
8832 for (const MapInfo &L : M) {
8833 assert(!L.Components.empty() &&
8834 "Not expecting declaration with no component lists.");
8835
8836 // Remember the current base pointer index.
8837 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8838 unsigned StructBasePointersIdx =
8839 StructBaseCurInfo.BasePointers.size();
8840 CurInfo.NonContigInfo.IsNonContiguous =
8841 L.Components.back().isNonContiguous();
8842 generateInfoForComponentList(
8843 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8844 CurInfo, StructBaseCurInfo, PartialStruct,
8845 /*IsFirstComponentList=*/false, L.IsImplicit,
8846 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8847 L.VarRef, /*OverlappedElements*/ {},
8848 HasMapBasePtr && HasMapArraySec);
8849
8850 // If this entry relates to a device pointer, set the relevant
8851 // declaration and add the 'return pointer' flag.
8852 if (L.ReturnDevicePointer) {
8853 // Check whether a value was added to either CurInfo or
8854 // StructBaseCurInfo and error if no value was added to either of
8855 // them:
8856 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8857 StructBasePointersIdx <
8858 StructBaseCurInfo.BasePointers.size()) &&
8859 "Unexpected number of mapped base pointers.");
8860
8861 // Choose a base pointer index which is always valid:
8862 const ValueDecl *RelevantVD =
8863 L.Components.back().getAssociatedDeclaration();
8864 assert(RelevantVD &&
8865 "No relevant declaration related with device pointer??");
8866
8867 // If StructBaseCurInfo has been updated this iteration then work on
8868 // the first new entry added to it i.e. make sure that when multiple
8869 // values are added to any of the lists, the first value added is
8870 // being modified by the assignments below (not the last value
8871 // added).
8872 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8873 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8874 RelevantVD;
8875 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8876 L.ForDeviceAddr ? DeviceInfoTy::Address
8877 : DeviceInfoTy::Pointer;
8878 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8879 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8880 } else {
8881 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8882 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8883 L.ForDeviceAddr ? DeviceInfoTy::Address
8884 : DeviceInfoTy::Pointer;
8885 CurInfo.Types[CurrentBasePointersIdx] |=
8886 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8887 }
8888 }
8889 }
8890 }
8891
8892 // Append any pending zero-length pointers which are struct members and
8893 // used with use_device_ptr or use_device_addr.
8894 auto CI = DeferredInfo.find(Data.first);
8895 if (CI != DeferredInfo.end()) {
8896 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8897 llvm::Value *BasePtr;
8898 llvm::Value *Ptr;
8899 if (L.ForDeviceAddr) {
8900 if (L.IE->isGLValue())
8901 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8902 else
8903 Ptr = this->CGF.EmitScalarExpr(L.IE);
8904 BasePtr = Ptr;
8905 // Entry is RETURN_PARAM. Also, set the placeholder value
8906 // MEMBER_OF=FFFF so that the entry is later updated with the
8907 // correct value of MEMBER_OF.
8908 CurInfo.Types.push_back(
8909 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8910 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8911 } else {
8912 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8913 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8914 L.IE->getExprLoc());
8915 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8916 // placeholder value MEMBER_OF=FFFF so that the entry is later
8917 // updated with the correct value of MEMBER_OF.
8918 CurInfo.Types.push_back(
8919 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8920 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8921 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8922 }
8923 CurInfo.Exprs.push_back(L.VD);
8924 CurInfo.BasePointers.emplace_back(BasePtr);
8925 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8926 CurInfo.DevicePointers.emplace_back(
8927 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8928 CurInfo.Pointers.push_back(Ptr);
8929 CurInfo.Sizes.push_back(
8930 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8931 CurInfo.Mappers.push_back(nullptr);
8932 }
8933 }
8934
8935 // Unify entries in one list making sure the struct mapping precedes the
8936 // individual fields:
8937 MapCombinedInfoTy UnionCurInfo;
8938 UnionCurInfo.append(StructBaseCurInfo);
8939 UnionCurInfo.append(CurInfo);
8940
8941 // If there is an entry in PartialStruct it means we have a struct with
8942 // individual members mapped. Emit an extra combined entry.
8943 if (PartialStruct.Base.isValid()) {
8944 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8945 // Emit a combined entry:
8946 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8947 /*IsMapThis*/ !VD, OMPBuilder, VD);
8948 }
8949
8950 // We need to append the results of this capture to what we already have.
8951 CombinedInfo.append(UnionCurInfo);
8952 }
8953 // Append data for use_device_ptr clauses.
8954 CombinedInfo.append(UseDeviceDataCombinedInfo);
8955 }
8956
8957public:
8958 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8959 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
8960 // Extract firstprivate clause information.
8961 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8962 for (const auto *D : C->varlist())
8963 FirstPrivateDecls.try_emplace(
8964 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8965 // Extract implicit firstprivates from uses_allocators clauses.
8966 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8967 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8968 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8969 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8970 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8971 /*Implicit=*/true);
8972 else if (const auto *VD = dyn_cast<VarDecl>(
8973 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8974 ->getDecl()))
8975 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8976 }
8977 }
8978 // Extract device pointer clause information.
8979 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8980 for (auto L : C->component_lists())
8981 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8982 // Extract device addr clause information.
8983 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8984 for (auto L : C->component_lists())
8985 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8986 // Extract map information.
8987 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8988 if (C->getMapType() != OMPC_MAP_to)
8989 continue;
8990 for (auto L : C->component_lists()) {
8991 const ValueDecl *VD = std::get<0>(L);
8992 const auto *RD = VD ? VD->getType()
8993 .getCanonicalType()
8994 .getNonReferenceType()
8995 ->getAsCXXRecordDecl()
8996 : nullptr;
8997 if (RD && RD->isLambda())
8998 LambdasMap.try_emplace(std::get<0>(L), C);
8999 }
9000 }
9001 }
9002
9003 /// Constructor for the declare mapper directive.
9004 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9005 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9006
9007 /// Generate code for the combined entry if we have a partially mapped struct
9008 /// and take care of the mapping flags of the arguments corresponding to
9009 /// individual struct members.
9010 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9011 MapFlagsArrayTy &CurTypes,
9012 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
9013 llvm::OpenMPIRBuilder &OMPBuilder,
9014 const ValueDecl *VD = nullptr,
9015 unsigned OffsetForMemberOfFlag = 0,
9016 bool NotTargetParams = true) const {
9017 if (CurTypes.size() == 1 &&
9018 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9019 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9020 !PartialStruct.IsArraySection)
9021 return;
9022 Address LBAddr = PartialStruct.LowestElem.second;
9023 Address HBAddr = PartialStruct.HighestElem.second;
9024 if (PartialStruct.HasCompleteRecord) {
9025 LBAddr = PartialStruct.LB;
9026 HBAddr = PartialStruct.LB;
9027 }
9028 CombinedInfo.Exprs.push_back(VD);
9029 // Base is the base of the struct
9030 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9031 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9032 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9033 // Pointer is the address of the lowest element
9034 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9035 const CXXMethodDecl *MD =
9036 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9037 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9038 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9039 // There should not be a mapper for a combined entry.
9040 if (HasBaseClass) {
9041 // OpenMP 5.2 148:21:
9042 // If the target construct is within a class non-static member function,
9043 // and a variable is an accessible data member of the object for which the
9044 // non-static data member function is invoked, the variable is treated as
9045 // if the this[:1] expression had appeared in a map clause with a map-type
9046 // of tofrom.
9047 // Emit this[:1]
9048 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9049 QualType Ty = MD->getFunctionObjectParameterType();
9050 llvm::Value *Size =
9051 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9052 /*isSigned=*/true);
9053 CombinedInfo.Sizes.push_back(Size);
9054 } else {
9055 CombinedInfo.Pointers.push_back(LB);
9056 // Size is (addr of {highest+1} element) - (addr of lowest element)
9057 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9058 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9059 HBAddr.getElementType(), HB, /*Idx0=*/1);
9060 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9061 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9062 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9063 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9064 /*isSigned=*/false);
9065 CombinedInfo.Sizes.push_back(Size);
9066 }
9067 CombinedInfo.Mappers.push_back(nullptr);
9068 // Map type is always TARGET_PARAM, if generate info for captures.
9069 CombinedInfo.Types.push_back(
9070 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9071 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9072 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9073 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9074 // If any element has the present modifier, then make sure the runtime
9075 // doesn't attempt to allocate the struct.
9076 if (CurTypes.end() !=
9077 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9078 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9079 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9080 }))
9081 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9082 // Remove TARGET_PARAM flag from the first element
9083 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9084 // If any element has the ompx_hold modifier, then make sure the runtime
9085 // uses the hold reference count for the struct as a whole so that it won't
9086 // be unmapped by an extra dynamic reference count decrement. Add it to all
9087 // elements as well so the runtime knows which reference count to check
9088 // when determining whether it's time for device-to-host transfers of
9089 // individual elements.
9090 if (CurTypes.end() !=
9091 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9092 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9093 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9094 })) {
9095 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9096 for (auto &M : CurTypes)
9097 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9098 }
9099
9100 // All other current entries will be MEMBER_OF the combined entry
9101 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9102 // 0xFFFF in the MEMBER_OF field).
9103 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9104 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9105 for (auto &M : CurTypes)
9106 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9107 }
9108
9109 /// Generate all the base pointers, section pointers, sizes, map types, and
9110 /// mappers for the extracted mappable expressions (all included in \a
9111 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9112 /// pair of the relevant declaration and index where it occurs is appended to
9113 /// the device pointers info array.
9114 void generateAllInfo(
9115 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9116 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9117 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9118 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9119 "Expect a executable directive");
9120 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9121 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9122 SkipVarSet);
9123 }
9124
9125 /// Generate all the base pointers, section pointers, sizes, map types, and
9126 /// mappers for the extracted map clauses of user-defined mapper (all included
9127 /// in \a CombinedInfo).
9128 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9129 llvm::OpenMPIRBuilder &OMPBuilder) const {
9130 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9131 "Expect a declare mapper directive");
9132 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9133 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9134 OMPBuilder);
9135 }
9136
9137 /// Emit capture info for lambdas for variables captured by reference.
9138 void generateInfoForLambdaCaptures(
9139 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9140 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9141 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9142 const auto *RD = VDType->getAsCXXRecordDecl();
9143 if (!RD || !RD->isLambda())
9144 return;
9145 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9146 CGF.getContext().getDeclAlign(VD));
9147 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9148 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9149 FieldDecl *ThisCapture = nullptr;
9150 RD->getCaptureFields(Captures, ThisCapture);
9151 if (ThisCapture) {
9152 LValue ThisLVal =
9153 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9154 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9155 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9156 VDLVal.getPointer(CGF));
9157 CombinedInfo.Exprs.push_back(VD);
9158 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9159 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9160 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9161 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9162 CombinedInfo.Sizes.push_back(
9163 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9164 CGF.Int64Ty, /*isSigned=*/true));
9165 CombinedInfo.Types.push_back(
9166 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9167 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9168 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9169 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9170 CombinedInfo.Mappers.push_back(nullptr);
9171 }
9172 for (const LambdaCapture &LC : RD->captures()) {
9173 if (!LC.capturesVariable())
9174 continue;
9175 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9176 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9177 continue;
9178 auto It = Captures.find(VD);
9179 assert(It != Captures.end() && "Found lambda capture without field.");
9180 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9181 if (LC.getCaptureKind() == LCK_ByRef) {
9182 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9183 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9184 VDLVal.getPointer(CGF));
9185 CombinedInfo.Exprs.push_back(VD);
9186 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9187 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9188 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9189 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9190 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9191 CGF.getTypeSize(
9193 CGF.Int64Ty, /*isSigned=*/true));
9194 } else {
9195 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9196 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9197 VDLVal.getPointer(CGF));
9198 CombinedInfo.Exprs.push_back(VD);
9199 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9200 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9201 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9202 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9203 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9204 }
9205 CombinedInfo.Types.push_back(
9206 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9207 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9208 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9209 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9210 CombinedInfo.Mappers.push_back(nullptr);
9211 }
9212 }
9213
9214 /// Set correct indices for lambdas captures.
9215 void adjustMemberOfForLambdaCaptures(
9216 llvm::OpenMPIRBuilder &OMPBuilder,
9217 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9218 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9219 MapFlagsArrayTy &Types) const {
9220 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9221 // Set correct member_of idx for all implicit lambda captures.
9222 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9223 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9224 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9225 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9226 continue;
9227 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9228 assert(BasePtr && "Unable to find base lambda address.");
9229 int TgtIdx = -1;
9230 for (unsigned J = I; J > 0; --J) {
9231 unsigned Idx = J - 1;
9232 if (Pointers[Idx] != BasePtr)
9233 continue;
9234 TgtIdx = Idx;
9235 break;
9236 }
9237 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9238 // All other current entries will be MEMBER_OF the combined entry
9239 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9240 // 0xFFFF in the MEMBER_OF field).
9241 OpenMPOffloadMappingFlags MemberOfFlag =
9242 OMPBuilder.getMemberOfFlag(TgtIdx);
9243 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9244 }
9245 }
9246
9247 /// For a capture that has an associated clause, generate the base pointers,
9248 /// section pointers, sizes, map types, and mappers (all included in
9249 /// \a CurCaptureVarInfo).
9250 void generateInfoForCaptureFromClauseInfo(
9251 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9252 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9253 unsigned OffsetForMemberOfFlag) const {
9254 assert(!Cap->capturesVariableArrayType() &&
9255 "Not expecting to generate map info for a variable array type!");
9256
9257 // We need to know when we generating information for the first component
9258 const ValueDecl *VD = Cap->capturesThis()
9259 ? nullptr
9260 : Cap->getCapturedVar()->getCanonicalDecl();
9261
9262 // for map(to: lambda): skip here, processing it in
9263 // generateDefaultMapInfo
9264 if (LambdasMap.count(VD))
9265 return;
9266
9267 // If this declaration appears in a is_device_ptr clause we just have to
9268 // pass the pointer by value. If it is a reference to a declaration, we just
9269 // pass its value.
9270 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9271 CurCaptureVarInfo.Exprs.push_back(VD);
9272 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9273 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9274 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9275 CurCaptureVarInfo.Pointers.push_back(Arg);
9276 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9277 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9278 /*isSigned=*/true));
9279 CurCaptureVarInfo.Types.push_back(
9280 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9281 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9282 CurCaptureVarInfo.Mappers.push_back(nullptr);
9283 return;
9284 }
9285
9286 MapDataArrayTy DeclComponentLists;
9287 // For member fields list in is_device_ptr, store it in
9288 // DeclComponentLists for generating components info.
9290 auto It = DevPointersMap.find(VD);
9291 if (It != DevPointersMap.end())
9292 for (const auto &MCL : It->second)
9293 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9294 /*IsImpicit = */ true, nullptr,
9295 nullptr);
9296 auto I = HasDevAddrsMap.find(VD);
9297 if (I != HasDevAddrsMap.end())
9298 for (const auto &MCL : I->second)
9299 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9300 /*IsImpicit = */ true, nullptr,
9301 nullptr);
9302 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9303 "Expect a executable directive");
9304 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9305 bool HasMapBasePtr = false;
9306 bool HasMapArraySec = false;
9307 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9308 const auto *EI = C->getVarRefs().begin();
9309 for (const auto L : C->decl_component_lists(VD)) {
9310 const ValueDecl *VDecl, *Mapper;
9311 // The Expression is not correct if the mapping is implicit
9312 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9314 std::tie(VDecl, Components, Mapper) = L;
9315 assert(VDecl == VD && "We got information for the wrong declaration??");
9316 assert(!Components.empty() &&
9317 "Not expecting declaration with no component lists.");
9318 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
9319 HasMapBasePtr = true;
9320 if (VD && E && VD->getType()->isAnyPointerType() &&
9322 HasMapArraySec = true;
9323 DeclComponentLists.emplace_back(Components, C->getMapType(),
9324 C->getMapTypeModifiers(),
9325 C->isImplicit(), Mapper, E);
9326 ++EI;
9327 }
9328 }
9329 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9330 const MapData &RHS) {
9331 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9332 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9333 bool HasPresent =
9334 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9335 bool HasAllocs = MapType == OMPC_MAP_alloc;
9336 MapModifiers = std::get<2>(RHS);
9337 MapType = std::get<1>(LHS);
9338 bool HasPresentR =
9339 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9340 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9341 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9342 });
9343
9344 auto GenerateInfoForComponentLists =
9345 [&](ArrayRef<MapData> DeclComponentLists,
9346 bool IsEligibleForTargetParamFlag) {
9347 MapCombinedInfoTy CurInfoForComponentLists;
9348 StructRangeInfoTy PartialStruct;
9349
9350 if (DeclComponentLists.empty())
9351 return;
9352
9353 generateInfoForCaptureFromComponentLists(
9354 VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
9355 IsEligibleForTargetParamFlag,
9356 /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
9357
9358 // If there is an entry in PartialStruct it means we have a
9359 // struct with individual members mapped. Emit an extra combined
9360 // entry.
9361 if (PartialStruct.Base.isValid()) {
9362 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9363 emitCombinedEntry(
9364 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9365 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
9366 OffsetForMemberOfFlag,
9367 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9368 }
9369
9370 // Return if we didn't add any entries.
9371 if (CurInfoForComponentLists.BasePointers.empty())
9372 return;
9373
9374 CurCaptureVarInfo.append(CurInfoForComponentLists);
9375 };
9376
9377 GenerateInfoForComponentLists(DeclComponentLists,
9378 /*IsEligibleForTargetParamFlag=*/true);
9379 }
9380
9381 /// Generate the base pointers, section pointers, sizes, map types, and
9382 /// mappers associated to \a DeclComponentLists for a given capture
9383 /// \a VD (all included in \a CurComponentListInfo).
9384 void generateInfoForCaptureFromComponentLists(
9385 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
9386 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
9387 bool IsListEligibleForTargetParamFlag,
9388 bool AreBothBasePtrAndPteeMapped = false) const {
9389 // Find overlapping elements (including the offset from the base element).
9390 llvm::SmallDenseMap<
9391 const MapData *,
9392 llvm::SmallVector<
9394 4>
9395 OverlappedData;
9396 size_t Count = 0;
9397 for (const MapData &L : DeclComponentLists) {
9399 OpenMPMapClauseKind MapType;
9400 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9401 bool IsImplicit;
9402 const ValueDecl *Mapper;
9403 const Expr *VarRef;
9404 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9405 L;
9406 ++Count;
9407 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9409 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9410 VarRef) = L1;
9411 auto CI = Components.rbegin();
9412 auto CE = Components.rend();
9413 auto SI = Components1.rbegin();
9414 auto SE = Components1.rend();
9415 for (; CI != CE && SI != SE; ++CI, ++SI) {
9416 if (CI->getAssociatedExpression()->getStmtClass() !=
9417 SI->getAssociatedExpression()->getStmtClass())
9418 break;
9419 // Are we dealing with different variables/fields?
9420 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9421 break;
9422 }
9423 // Found overlapping if, at least for one component, reached the head
9424 // of the components list.
9425 if (CI == CE || SI == SE) {
9426 // Ignore it if it is the same component.
9427 if (CI == CE && SI == SE)
9428 continue;
9429 const auto It = (SI == SE) ? CI : SI;
9430 // If one component is a pointer and another one is a kind of
9431 // dereference of this pointer (array subscript, section, dereference,
9432 // etc.), it is not an overlapping.
9433 // Same, if one component is a base and another component is a
9434 // dereferenced pointer memberexpr with the same base.
9435 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9436 (std::prev(It)->getAssociatedDeclaration() &&
9437 std::prev(It)
9438 ->getAssociatedDeclaration()
9439 ->getType()
9440 ->isPointerType()) ||
9441 (It->getAssociatedDeclaration() &&
9442 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9443 std::next(It) != CE && std::next(It) != SE))
9444 continue;
9445 const MapData &BaseData = CI == CE ? L : L1;
9447 SI == SE ? Components : Components1;
9448 OverlappedData[&BaseData].push_back(SubData);
9449 }
9450 }
9451 }
9452 // Sort the overlapped elements for each item.
9453 llvm::SmallVector<const FieldDecl *, 4> Layout;
9454 if (!OverlappedData.empty()) {
9455 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9456 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9457 while (BaseType != OrigType) {
9458 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9459 OrigType = BaseType->getPointeeOrArrayElementType();
9460 }
9461
9462 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9463 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9464 else {
9465 const auto *RD = BaseType->getAsRecordDecl();
9466 Layout.append(RD->field_begin(), RD->field_end());
9467 }
9468 }
9469 for (auto &Pair : OverlappedData) {
9470 llvm::stable_sort(
9471 Pair.getSecond(),
9472 [&Layout](
9475 Second) {
9476 auto CI = First.rbegin();
9477 auto CE = First.rend();
9478 auto SI = Second.rbegin();
9479 auto SE = Second.rend();
9480 for (; CI != CE && SI != SE; ++CI, ++SI) {
9481 if (CI->getAssociatedExpression()->getStmtClass() !=
9482 SI->getAssociatedExpression()->getStmtClass())
9483 break;
9484 // Are we dealing with different variables/fields?
9485 if (CI->getAssociatedDeclaration() !=
9486 SI->getAssociatedDeclaration())
9487 break;
9488 }
9489
9490 // Lists contain the same elements.
9491 if (CI == CE && SI == SE)
9492 return false;
9493
9494 // List with less elements is less than list with more elements.
9495 if (CI == CE || SI == SE)
9496 return CI == CE;
9497
9498 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9499 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9500 if (FD1->getParent() == FD2->getParent())
9501 return FD1->getFieldIndex() < FD2->getFieldIndex();
9502 const auto *It =
9503 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9504 return FD == FD1 || FD == FD2;
9505 });
9506 return *It == FD1;
9507 });
9508 }
9509
9510 // Associated with a capture, because the mapping flags depend on it.
9511 // Go through all of the elements with the overlapped elements.
9512 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9513 MapCombinedInfoTy StructBaseCombinedInfo;
9514 for (const auto &Pair : OverlappedData) {
9515 const MapData &L = *Pair.getFirst();
9517 OpenMPMapClauseKind MapType;
9518 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9519 bool IsImplicit;
9520 const ValueDecl *Mapper;
9521 const Expr *VarRef;
9522 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9523 L;
9524 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9525 OverlappedComponents = Pair.getSecond();
9526 generateInfoForComponentList(
9527 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9528 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9529 /*GenerateAllInfoForClauses*/ false, Mapper,
9530 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9531 AddTargetParamFlag = false;
9532 }
9533 // Go through other elements without overlapped elements.
9534 for (const MapData &L : DeclComponentLists) {
9536 OpenMPMapClauseKind MapType;
9537 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9538 bool IsImplicit;
9539 const ValueDecl *Mapper;
9540 const Expr *VarRef;
9541 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9542 L;
9543 auto It = OverlappedData.find(&L);
9544 if (It == OverlappedData.end())
9545 generateInfoForComponentList(
9546 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9547 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9548 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9549 /*ForDeviceAddr=*/false, VD, VarRef,
9550 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9551 AddTargetParamFlag = false;
9552 }
9553 }
9554
9555 /// Generate the default map information for a given capture \a CI,
9556 /// record field declaration \a RI and captured value \a CV.
9557 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9558 const FieldDecl &RI, llvm::Value *CV,
9559 MapCombinedInfoTy &CombinedInfo) const {
9560 bool IsImplicit = true;
9561 // Do the default mapping.
9562 if (CI.capturesThis()) {
9563 CombinedInfo.Exprs.push_back(nullptr);
9564 CombinedInfo.BasePointers.push_back(CV);
9565 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9566 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9567 CombinedInfo.Pointers.push_back(CV);
9568 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9569 CombinedInfo.Sizes.push_back(
9570 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9571 CGF.Int64Ty, /*isSigned=*/true));
9572 // Default map type.
9573 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9574 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9575 } else if (CI.capturesVariableByCopy()) {
9576 const VarDecl *VD = CI.getCapturedVar();
9577 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9578 CombinedInfo.BasePointers.push_back(CV);
9579 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9580 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9581 CombinedInfo.Pointers.push_back(CV);
9582 if (!RI.getType()->isAnyPointerType()) {
9583 // We have to signal to the runtime captures passed by value that are
9584 // not pointers.
9585 CombinedInfo.Types.push_back(
9586 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9587 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9588 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9589 } else {
9590 // Pointers are implicitly mapped with a zero size and no flags
9591 // (other than first map that is added for all implicit maps).
9592 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9593 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9594 }
9595 auto I = FirstPrivateDecls.find(VD);
9596 if (I != FirstPrivateDecls.end())
9597 IsImplicit = I->getSecond();
9598 } else {
9599 assert(CI.capturesVariable() && "Expected captured reference.");
9600 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9601 QualType ElementType = PtrTy->getPointeeType();
9602 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9603 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9604 // The default map type for a scalar/complex type is 'to' because by
9605 // default the value doesn't have to be retrieved. For an aggregate
9606 // type, the default is 'tofrom'.
9607 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9608 const VarDecl *VD = CI.getCapturedVar();
9609 auto I = FirstPrivateDecls.find(VD);
9610 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9611 CombinedInfo.BasePointers.push_back(CV);
9612 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9613 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9614 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9615 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9616 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9618 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
9619 } else {
9620 CombinedInfo.Pointers.push_back(CV);
9621 }
9622 if (I != FirstPrivateDecls.end())
9623 IsImplicit = I->getSecond();
9624 }
9625 // Every default map produces a single argument which is a target parameter.
9626 CombinedInfo.Types.back() |=
9627 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9628
9629 // Add flag stating this is an implicit map.
9630 if (IsImplicit)
9631 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9632
9633 // No user-defined mapper for default mapping.
9634 CombinedInfo.Mappers.push_back(nullptr);
9635 }
9636};
9637} // anonymous namespace
9638
9639// Try to extract the base declaration from a `this->x` expression if possible.
9641 if (!E)
9642 return nullptr;
9643
9644 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9645 if (const MemberExpr *ME =
9646 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9647 return ME->getMemberDecl();
9648 return nullptr;
9649}
9650
9651/// Emit a string constant containing the names of the values mapped to the
9652/// offloading runtime library.
9653static llvm::Constant *
9654emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9655 MappableExprsHandler::MappingExprInfo &MapExprs) {
9656
9657 uint32_t SrcLocStrSize;
9658 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9659 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9660
9661 SourceLocation Loc;
9662 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9663 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9664 Loc = VD->getLocation();
9665 else
9666 Loc = MapExprs.getMapExpr()->getExprLoc();
9667 } else {
9668 Loc = MapExprs.getMapDecl()->getLocation();
9669 }
9670
9671 std::string ExprName;
9672 if (MapExprs.getMapExpr()) {
9674 llvm::raw_string_ostream OS(ExprName);
9675 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9676 } else {
9677 ExprName = MapExprs.getMapDecl()->getNameAsString();
9678 }
9679
9680 std::string FileName;
9682 if (auto *DbgInfo = CGF.getDebugInfo())
9683 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9684 else
9685 FileName = PLoc.getFilename();
9686 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9687 PLoc.getColumn(), SrcLocStrSize);
9688}
9689/// Emit the arrays used to pass the captures and map information to the
9690/// offloading runtime library. If there is no map or capture information,
9691/// return nullptr by reference.
9693 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9694 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9695 bool IsNonContiguous = false, bool ForEndCall = false) {
9696 CodeGenModule &CGM = CGF.CGM;
9697
9698 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9699 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9700 CGF.AllocaInsertPt->getIterator());
9701 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9702 CGF.Builder.GetInsertPoint());
9703
9704 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9705 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9706 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9707 }
9708 };
9709
9710 auto CustomMapperCB = [&](unsigned int I) {
9711 llvm::Function *MFunc = nullptr;
9712 if (CombinedInfo.Mappers[I]) {
9713 Info.HasMapper = true;
9715 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9716 }
9717 return MFunc;
9718 };
9719 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9720 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9721 IsNonContiguous, ForEndCall, DeviceAddrCB));
9722}
9723
9724/// Check for inner distribute directive.
9725static const OMPExecutableDirective *
9727 const auto *CS = D.getInnermostCapturedStmt();
9728 const auto *Body =
9729 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9730 const Stmt *ChildStmt =
9732
9733 if (const auto *NestedDir =
9734 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9735 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9736 switch (D.getDirectiveKind()) {
9737 case OMPD_target:
9738 // For now, treat 'target' with nested 'teams loop' as if it's
9739 // distributed (target teams distribute).
9740 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9741 return NestedDir;
9742 if (DKind == OMPD_teams) {
9743 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9744 /*IgnoreCaptured=*/true);
9745 if (!Body)
9746 return nullptr;
9747 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9748 if (const auto *NND =
9749 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9750 DKind = NND->getDirectiveKind();
9751 if (isOpenMPDistributeDirective(DKind))
9752 return NND;
9753 }
9754 }
9755 return nullptr;
9756 case OMPD_target_teams:
9757 if (isOpenMPDistributeDirective(DKind))
9758 return NestedDir;
9759 return nullptr;
9760 case OMPD_target_parallel:
9761 case OMPD_target_simd:
9762 case OMPD_target_parallel_for:
9763 case OMPD_target_parallel_for_simd:
9764 return nullptr;
9765 case OMPD_target_teams_distribute:
9766 case OMPD_target_teams_distribute_simd:
9767 case OMPD_target_teams_distribute_parallel_for:
9768 case OMPD_target_teams_distribute_parallel_for_simd:
9769 case OMPD_parallel:
9770 case OMPD_for:
9771 case OMPD_parallel_for:
9772 case OMPD_parallel_master:
9773 case OMPD_parallel_sections:
9774 case OMPD_for_simd:
9775 case OMPD_parallel_for_simd:
9776 case OMPD_cancel:
9777 case OMPD_cancellation_point:
9778 case OMPD_ordered:
9779 case OMPD_threadprivate:
9780 case OMPD_allocate:
9781 case OMPD_task:
9782 case OMPD_simd:
9783 case OMPD_tile:
9784 case OMPD_unroll:
9785 case OMPD_sections:
9786 case OMPD_section:
9787 case OMPD_single:
9788 case OMPD_master:
9789 case OMPD_critical:
9790 case OMPD_taskyield:
9791 case OMPD_barrier:
9792 case OMPD_taskwait:
9793 case OMPD_taskgroup:
9794 case OMPD_atomic:
9795 case OMPD_flush:
9796 case OMPD_depobj:
9797 case OMPD_scan:
9798 case OMPD_teams:
9799 case OMPD_target_data:
9800 case OMPD_target_exit_data:
9801 case OMPD_target_enter_data:
9802 case OMPD_distribute:
9803 case OMPD_distribute_simd:
9804 case OMPD_distribute_parallel_for:
9805 case OMPD_distribute_parallel_for_simd:
9806 case OMPD_teams_distribute:
9807 case OMPD_teams_distribute_simd:
9808 case OMPD_teams_distribute_parallel_for:
9809 case OMPD_teams_distribute_parallel_for_simd:
9810 case OMPD_target_update:
9811 case OMPD_declare_simd:
9812 case OMPD_declare_variant:
9813 case OMPD_begin_declare_variant:
9814 case OMPD_end_declare_variant:
9815 case OMPD_declare_target:
9816 case OMPD_end_declare_target:
9817 case OMPD_declare_reduction:
9818 case OMPD_declare_mapper:
9819 case OMPD_taskloop:
9820 case OMPD_taskloop_simd:
9821 case OMPD_master_taskloop:
9822 case OMPD_master_taskloop_simd:
9823 case OMPD_parallel_master_taskloop:
9824 case OMPD_parallel_master_taskloop_simd:
9825 case OMPD_requires:
9826 case OMPD_metadirective:
9827 case OMPD_unknown:
9828 default:
9829 llvm_unreachable("Unexpected directive.");
9830 }
9831 }
9832
9833 return nullptr;
9834}
9835
9836/// Emit the user-defined mapper function. The code generation follows the
9837/// pattern in the example below.
9838/// \code
9839/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9840/// void *base, void *begin,
9841/// int64_t size, int64_t type,
9842/// void *name = nullptr) {
9843/// // Allocate space for an array section first or add a base/begin for
9844/// // pointer dereference.
9845/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9846/// !maptype.IsDelete)
9847/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9848/// size*sizeof(Ty), clearToFromMember(type));
9849/// // Map members.
9850/// for (unsigned i = 0; i < size; i++) {
9851/// // For each component specified by this mapper:
9852/// for (auto c : begin[i]->all_components) {
9853/// if (c.hasMapper())
9854/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9855/// c.arg_type, c.arg_name);
9856/// else
9857/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9858/// c.arg_begin, c.arg_size, c.arg_type,
9859/// c.arg_name);
9860/// }
9861/// }
9862/// // Delete the array section.
9863/// if (size > 1 && maptype.IsDelete)
9864/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9865/// size*sizeof(Ty), clearToFromMember(type));
9866/// }
9867/// \endcode
9869 CodeGenFunction *CGF) {
9870 if (UDMMap.count(D) > 0)
9871 return;
9872 ASTContext &C = CGM.getContext();
9873 QualType Ty = D->getType();
9874 auto *MapperVarDecl =
9876 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9877 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9878
9879 CodeGenFunction MapperCGF(CGM);
9880 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9881 auto PrivatizeAndGenMapInfoCB =
9882 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9883 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9884 MapperCGF.Builder.restoreIP(CodeGenIP);
9885
9886 // Privatize the declared variable of mapper to be the current array
9887 // element.
9888 Address PtrCurrent(
9889 PtrPHI, ElemTy,
9890 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9891 .getAlignment()
9892 .alignmentOfArrayElement(ElementSize));
9894 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9895 (void)Scope.Privatize();
9896
9897 // Get map clause information.
9898 MappableExprsHandler MEHandler(*D, MapperCGF);
9899 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9900
9901 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9902 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9903 };
9904 if (CGM.getCodeGenOpts().getDebugInfo() !=
9905 llvm::codegenoptions::NoDebugInfo) {
9906 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9907 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9908 FillInfoMap);
9909 }
9910
9911 return CombinedInfo;
9912 };
9913
9914 auto CustomMapperCB = [&](unsigned I) {
9915 llvm::Function *MapperFunc = nullptr;
9916 if (CombinedInfo.Mappers[I]) {
9917 // Call the corresponding mapper function.
9919 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9920 assert(MapperFunc && "Expect a valid mapper function is available.");
9921 }
9922 return MapperFunc;
9923 };
9924
9925 SmallString<64> TyStr;
9926 llvm::raw_svector_ostream Out(TyStr);
9927 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9928 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9929
9930 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
9931 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
9932 UDMMap.try_emplace(D, NewFn);
9933 if (CGF)
9934 FunctionUDMMap[CGF->CurFn].push_back(D);
9935}
9936
9938 const OMPDeclareMapperDecl *D) {
9939 auto I = UDMMap.find(D);
9940 if (I != UDMMap.end())
9941 return I->second;
9943 return UDMMap.lookup(D);
9944}
9945
9948 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9949 const OMPLoopDirective &D)>
9950 SizeEmitter) {
9951 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9952 const OMPExecutableDirective *TD = &D;
9953 // Get nested teams distribute kind directive, if any. For now, treat
9954 // 'target_teams_loop' as if it's really a target_teams_distribute.
9955 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9956 Kind != OMPD_target_teams_loop)
9957 TD = getNestedDistributeDirective(CGM.getContext(), D);
9958 if (!TD)
9959 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9960
9961 const auto *LD = cast<OMPLoopDirective>(TD);
9962 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9963 return NumIterations;
9964 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9965}
9966
9967static void
9968emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9969 const OMPExecutableDirective &D,
9971 bool RequiresOuterTask, const CapturedStmt &CS,
9972 bool OffloadingMandatory, CodeGenFunction &CGF) {
9973 if (OffloadingMandatory) {
9974 CGF.Builder.CreateUnreachable();
9975 } else {
9976 if (RequiresOuterTask) {
9977 CapturedVars.clear();
9978 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9979 }
9980 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9981 CapturedVars);
9982 }
9983}
9984
9985static llvm::Value *emitDeviceID(
9986 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9987 CodeGenFunction &CGF) {
9988 // Emit device ID if any.
9989 llvm::Value *DeviceID;
9990 if (Device.getPointer()) {
9991 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9992 Device.getInt() == OMPC_DEVICE_device_num) &&
9993 "Expected device_num modifier.");
9994 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9995 DeviceID =
9996 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9997 } else {
9998 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9999 }
10000 return DeviceID;
10001}
10002
10003static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
10004 CodeGenFunction &CGF) {
10005 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
10006
10007 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
10008 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
10009 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
10010 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
10011 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
10012 /*isSigned=*/false);
10013 }
10014 return DynCGroupMem;
10015}
10017 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10018 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10019 llvm::OpenMPIRBuilder &OMPBuilder,
10020 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10021 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10022
10023 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10024 auto RI = CS.getCapturedRecordDecl()->field_begin();
10025 auto *CV = CapturedVars.begin();
10027 CE = CS.capture_end();
10028 CI != CE; ++CI, ++RI, ++CV) {
10029 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10030
10031 // VLA sizes are passed to the outlined region by copy and do not have map
10032 // information associated.
10033 if (CI->capturesVariableArrayType()) {
10034 CurInfo.Exprs.push_back(nullptr);
10035 CurInfo.BasePointers.push_back(*CV);
10036 CurInfo.DevicePtrDecls.push_back(nullptr);
10037 CurInfo.DevicePointers.push_back(
10038 MappableExprsHandler::DeviceInfoTy::None);
10039 CurInfo.Pointers.push_back(*CV);
10040 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10041 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10042 // Copy to the device as an argument. No need to retrieve it.
10043 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10044 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10045 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10046 CurInfo.Mappers.push_back(nullptr);
10047 } else {
10048 // If we have any information in the map clause, we use it, otherwise we
10049 // just do a default mapping.
10050 MEHandler.generateInfoForCaptureFromClauseInfo(
10051 CI, *CV, CurInfo, OMPBuilder,
10052 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10053
10054 if (!CI->capturesThis())
10055 MappedVarSet.insert(CI->getCapturedVar());
10056 else
10057 MappedVarSet.insert(nullptr);
10058
10059 if (CurInfo.BasePointers.empty())
10060 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10061
10062 // Generate correct mapping for variables captured by reference in
10063 // lambdas.
10064 if (CI->capturesVariable())
10065 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10066 CurInfo, LambdaPointers);
10067 }
10068 // We expect to have at least an element of information for this capture.
10069 assert(!CurInfo.BasePointers.empty() &&
10070 "Non-existing map pointer for capture!");
10071 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10072 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10073 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10074 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10075 "Inconsistent map information sizes!");
10076
10077 // We need to append the results of this capture to what we already have.
10078 CombinedInfo.append(CurInfo);
10079 }
10080 // Adjust MEMBER_OF flags for the lambdas captures.
10081 MEHandler.adjustMemberOfForLambdaCaptures(
10082 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10083 CombinedInfo.Pointers, CombinedInfo.Types);
10084}
10085static void
10086genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10087 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10088 llvm::OpenMPIRBuilder &OMPBuilder,
10089 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10090 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10091
10092 CodeGenModule &CGM = CGF.CGM;
10093 // Map any list items in a map clause that were not captures because they
10094 // weren't referenced within the construct.
10095 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10096
10097 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10098 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10099 };
10100 if (CGM.getCodeGenOpts().getDebugInfo() !=
10101 llvm::codegenoptions::NoDebugInfo) {
10102 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10103 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10104 FillInfoMap);
10105 }
10106}
10107
10109 const CapturedStmt &CS,
10111 llvm::OpenMPIRBuilder &OMPBuilder,
10112 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10113 // Get mappable expression information.
10114 MappableExprsHandler MEHandler(D, CGF);
10115 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10116
10117 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10118 MappedVarSet, CombinedInfo);
10119 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10120}
10121
10122template <typename ClauseTy>
10123static void
10125 const OMPExecutableDirective &D,
10127 const auto *C = D.getSingleClause<ClauseTy>();
10128 assert(!C->varlist_empty() &&
10129 "ompx_bare requires explicit num_teams and thread_limit");
10131 for (auto *E : C->varlist()) {
10132 llvm::Value *V = CGF.EmitScalarExpr(E);
10133 Values.push_back(
10134 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10135 }
10136}
10137
10139 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10140 const OMPExecutableDirective &D,
10141 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10142 const CapturedStmt &CS, bool OffloadingMandatory,
10143 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10144 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10145 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10146 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10147 const OMPLoopDirective &D)>
10148 SizeEmitter,
10149 CodeGenFunction &CGF, CodeGenModule &CGM) {
10150 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10151
10152 // Fill up the arrays with all the captured variables.
10153 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10155 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10156
10157 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10158 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10159
10160 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10161 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10162 CGF.VoidPtrTy, CGM.getPointerAlign());
10163 InputInfo.PointersArray =
10164 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10165 InputInfo.SizesArray =
10166 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10167 InputInfo.MappersArray =
10168 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10169 MapTypesArray = Info.RTArgs.MapTypesArray;
10170 MapNamesArray = Info.RTArgs.MapNamesArray;
10171
10172 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10173 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10174 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10175 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10176 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10177
10178 if (IsReverseOffloading) {
10179 // Reverse offloading is not supported, so just execute on the host.
10180 // FIXME: This fallback solution is incorrect since it ignores the
10181 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10182 // assert here and ensure SEMA emits an error.
10183 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10184 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10185 return;
10186 }
10187
10188 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10189 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10190
10191 llvm::Value *BasePointersArray =
10192 InputInfo.BasePointersArray.emitRawPointer(CGF);
10193 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10194 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10195 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10196
10197 auto &&EmitTargetCallFallbackCB =
10198 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10199 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10200 -> llvm::OpenMPIRBuilder::InsertPointTy {
10201 CGF.Builder.restoreIP(IP);
10202 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10203 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10204 return CGF.Builder.saveIP();
10205 };
10206
10207 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10210 if (IsBare) {
10213 NumThreads);
10214 } else {
10215 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10216 NumThreads.push_back(
10217 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10218 }
10219
10220 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10221 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10222 llvm::Value *NumIterations =
10223 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10224 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
10225 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10226 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10227
10228 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10229 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10230 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10231
10232 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10233 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10234 DynCGGroupMem, HasNoWait);
10235
10236 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10237 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10238 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10239 RTLoc, AllocaIP));
10240 CGF.Builder.restoreIP(AfterIP);
10241 };
10242
10243 if (RequiresOuterTask)
10244 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10245 else
10246 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10247}
10248
10249static void
10250emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10251 const OMPExecutableDirective &D,
10253 bool RequiresOuterTask, const CapturedStmt &CS,
10254 bool OffloadingMandatory, CodeGenFunction &CGF) {
10255
10256 // Notify that the host version must be executed.
10257 auto &&ElseGen =
10258 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10259 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10260 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10261 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10262 };
10263
10264 if (RequiresOuterTask) {
10266 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10267 } else {
10268 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10269 }
10270}
10271
10274 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10275 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10276 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10277 const OMPLoopDirective &D)>
10278 SizeEmitter) {
10279 if (!CGF.HaveInsertPoint())
10280 return;
10281
10282 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
10283 CGM.getLangOpts().OpenMPOffloadMandatory;
10284
10285 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10286
10287 const bool RequiresOuterTask =
10288 D.hasClausesOfKind<OMPDependClause>() ||
10289 D.hasClausesOfKind<OMPNowaitClause>() ||
10290 D.hasClausesOfKind<OMPInReductionClause>() ||
10291 (CGM.getLangOpts().OpenMP >= 51 &&
10292 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
10293 D.hasClausesOfKind<OMPThreadLimitClause>());
10295 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10296 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10297 PrePostActionTy &) {
10298 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10299 };
10300 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10301
10303 llvm::Value *MapTypesArray = nullptr;
10304 llvm::Value *MapNamesArray = nullptr;
10305
10306 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
10307 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10308 OutlinedFnID, &InputInfo, &MapTypesArray,
10309 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
10310 PrePostActionTy &) {
10311 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
10312 RequiresOuterTask, CS, OffloadingMandatory,
10313 Device, OutlinedFnID, InputInfo, MapTypesArray,
10314 MapNamesArray, SizeEmitter, CGF, CGM);
10315 };
10316
10317 auto &&TargetElseGen =
10318 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10319 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10320 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
10321 CS, OffloadingMandatory, CGF);
10322 };
10323
10324 // If we have a target function ID it means that we need to support
10325 // offloading, otherwise, just execute on the host. We need to execute on host
10326 // regardless of the conditional in the if clause if, e.g., the user do not
10327 // specify target triples.
10328 if (OutlinedFnID) {
10329 if (IfCond) {
10330 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10331 } else {
10332 RegionCodeGenTy ThenRCG(TargetThenGen);
10333 ThenRCG(CGF);
10334 }
10335 } else {
10336 RegionCodeGenTy ElseRCG(TargetElseGen);
10337 ElseRCG(CGF);
10338 }
10339}
10340
10342 StringRef ParentName) {
10343 if (!S)
10344 return;
10345
10346 // Codegen OMP target directives that offload compute to the device.
10347 bool RequiresDeviceCodegen =
10350 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10351
10352 if (RequiresDeviceCodegen) {
10353 const auto &E = *cast<OMPExecutableDirective>(S);
10354
10355 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
10356 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
10357
10358 // Is this a target region that should not be emitted as an entry point? If
10359 // so just signal we are done with this target region.
10360 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10361 return;
10362
10363 switch (E.getDirectiveKind()) {
10364 case OMPD_target:
10367 break;
10368 case OMPD_target_parallel:
10370 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10371 break;
10372 case OMPD_target_teams:
10374 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10375 break;
10376 case OMPD_target_teams_distribute:
10379 break;
10380 case OMPD_target_teams_distribute_simd:
10383 break;
10384 case OMPD_target_parallel_for:
10387 break;
10388 case OMPD_target_parallel_for_simd:
10391 break;
10392 case OMPD_target_simd:
10394 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10395 break;
10396 case OMPD_target_teams_distribute_parallel_for:
10398 CGM, ParentName,
10400 break;
10401 case OMPD_target_teams_distribute_parallel_for_simd:
10404 CGM, ParentName,
10406 break;
10407 case OMPD_target_teams_loop:
10410 break;
10411 case OMPD_target_parallel_loop:
10414 break;
10415 case OMPD_parallel:
10416 case OMPD_for:
10417 case OMPD_parallel_for:
10418 case OMPD_parallel_master:
10419 case OMPD_parallel_sections:
10420 case OMPD_for_simd:
10421 case OMPD_parallel_for_simd:
10422 case OMPD_cancel:
10423 case OMPD_cancellation_point:
10424 case OMPD_ordered:
10425 case OMPD_threadprivate:
10426 case OMPD_allocate:
10427 case OMPD_task:
10428 case OMPD_simd:
10429 case OMPD_tile:
10430 case OMPD_unroll:
10431 case OMPD_sections:
10432 case OMPD_section:
10433 case OMPD_single:
10434 case OMPD_master:
10435 case OMPD_critical:
10436 case OMPD_taskyield:
10437 case OMPD_barrier:
10438 case OMPD_taskwait:
10439 case OMPD_taskgroup:
10440 case OMPD_atomic:
10441 case OMPD_flush:
10442 case OMPD_depobj:
10443 case OMPD_scan:
10444 case OMPD_teams:
10445 case OMPD_target_data:
10446 case OMPD_target_exit_data:
10447 case OMPD_target_enter_data:
10448 case OMPD_distribute:
10449 case OMPD_distribute_simd:
10450 case OMPD_distribute_parallel_for:
10451 case OMPD_distribute_parallel_for_simd:
10452 case OMPD_teams_distribute:
10453 case OMPD_teams_distribute_simd:
10454 case OMPD_teams_distribute_parallel_for:
10455 case OMPD_teams_distribute_parallel_for_simd:
10456 case OMPD_target_update:
10457 case OMPD_declare_simd:
10458 case OMPD_declare_variant:
10459 case OMPD_begin_declare_variant:
10460 case OMPD_end_declare_variant:
10461 case OMPD_declare_target:
10462 case OMPD_end_declare_target:
10463 case OMPD_declare_reduction:
10464 case OMPD_declare_mapper:
10465 case OMPD_taskloop:
10466 case OMPD_taskloop_simd:
10467 case OMPD_master_taskloop:
10468 case OMPD_master_taskloop_simd:
10469 case OMPD_parallel_master_taskloop:
10470 case OMPD_parallel_master_taskloop_simd:
10471 case OMPD_requires:
10472 case OMPD_metadirective:
10473 case OMPD_unknown:
10474 default:
10475 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10476 }
10477 return;
10478 }
10479
10480 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10481 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10482 return;
10483
10484 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10485 return;
10486 }
10487
10488 // If this is a lambda function, look into its body.
10489 if (const auto *L = dyn_cast<LambdaExpr>(S))
10490 S = L->getBody();
10491
10492 // Keep looking for target regions recursively.
10493 for (const Stmt *II : S->children())
10494 scanForTargetRegionsFunctions(II, ParentName);
10495}
10496
10497static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10498 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10499 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10500 if (!DevTy)
10501 return false;
10502 // Do not emit device_type(nohost) functions for the host.
10503 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10504 return true;
10505 // Do not emit device_type(host) functions for the device.
10506 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10507 return true;
10508 return false;
10509}
10510
10512 // If emitting code for the host, we do not process FD here. Instead we do
10513 // the normal code generation.
10514 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10515 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10517 CGM.getLangOpts().OpenMPIsTargetDevice))
10518 return true;
10519 return false;
10520 }
10521
10522 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10523 // Try to detect target regions in the function.
10524 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10525 StringRef Name = CGM.getMangledName(GD);
10528 CGM.getLangOpts().OpenMPIsTargetDevice))
10529 return true;
10530 }
10531
10532 // Do not to emit function if it is not marked as declare target.
10533 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10534 AlreadyEmittedTargetDecls.count(VD) == 0;
10535}
10536
10539 CGM.getLangOpts().OpenMPIsTargetDevice))
10540 return true;
10541
10542 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10543 return false;
10544
10545 // Check if there are Ctors/Dtors in this declaration and look for target
10546 // regions in it. We use the complete variant to produce the kernel name
10547 // mangling.
10548 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10549 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10550 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10551 StringRef ParentName =
10552 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10553 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10554 }
10555 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10556 StringRef ParentName =
10557 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10558 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10559 }
10560 }
10561
10562 // Do not to emit variable if it is not marked as declare target.
10563 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10564 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10565 cast<VarDecl>(GD.getDecl()));
10566 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10567 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10568 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10571 return true;
10572 }
10573 return false;
10574}
10575
10577 llvm::Constant *Addr) {
10578 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10579 !CGM.getLangOpts().OpenMPIsTargetDevice)
10580 return;
10581
10582 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10583 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10584
10585 // If this is an 'extern' declaration we defer to the canonical definition and
10586 // do not emit an offloading entry.
10587 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10588 VD->hasExternalStorage())
10589 return;
10590
10591 if (!Res) {
10592 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10593 // Register non-target variables being emitted in device code (debug info
10594 // may cause this).
10595 StringRef VarName = CGM.getMangledName(VD);
10596 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10597 }
10598 return;
10599 }
10600
10601 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10602 auto LinkageForVariable = [&VD, this]() {
10603 return CGM.getLLVMLinkageVarDefinition(VD);
10604 };
10605
10606 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10607 OMPBuilder.registerTargetGlobalVariable(
10609 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10610 VD->isExternallyVisible(),
10612 VD->getCanonicalDecl()->getBeginLoc()),
10613 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10614 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10615 CGM.getTypes().ConvertTypeForMem(
10616 CGM.getContext().getPointerType(VD->getType())),
10617 Addr);
10618
10619 for (auto *ref : GeneratedRefs)
10620 CGM.addCompilerUsedGlobal(ref);
10621}
10622
10624 if (isa<FunctionDecl>(GD.getDecl()) ||
10626 return emitTargetFunctions(GD);
10627
10628 return emitTargetGlobalVariable(GD);
10629}
10630
10632 for (const VarDecl *VD : DeferredGlobalVariables) {
10633 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10634 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10635 if (!Res)
10636 continue;
10637 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10638 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10640 CGM.EmitGlobal(VD);
10641 } else {
10642 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10643 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10644 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10646 "Expected link clause or to clause with unified memory.");
10647 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10648 }
10649 }
10650}
10651
10653 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10654 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10655 " Expected target-based directive.");
10656}
10657
10659 for (const OMPClause *Clause : D->clauselists()) {
10660 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10662 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10663 } else if (const auto *AC =
10664 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10665 switch (AC->getAtomicDefaultMemOrderKind()) {
10666 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10667 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10668 break;
10669 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10670 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10671 break;
10672 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10673 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10674 break;
10676 break;
10677 }
10678 }
10679 }
10680}
10681
10682llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10684}
10685
10687 LangAS &AS) {
10688 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10689 return false;
10690 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10691 switch(A->getAllocatorType()) {
10692 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10693 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10694 // Not supported, fallback to the default mem space.
10695 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10696 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10697 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10698 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10699 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10700 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10701 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10702 AS = LangAS::Default;
10703 return true;
10704 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10705 llvm_unreachable("Expected predefined allocator for the variables with the "
10706 "static storage.");
10707 }
10708 return false;
10709}
10710
10714
10716 CodeGenModule &CGM)
10717 : CGM(CGM) {
10718 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10719 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10720 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10721 }
10722}
10723
10725 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10726 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10727}
10728
10730 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10731 return true;
10732
10733 const auto *D = cast<FunctionDecl>(GD.getDecl());
10734 // Do not to emit function if it is marked as declare target as it was already
10735 // emitted.
10736 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10737 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10738 if (auto *F = dyn_cast_or_null<llvm::Function>(
10739 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10740 return !F->isDeclaration();
10741 return false;
10742 }
10743 return true;
10744 }
10745
10746 return !AlreadyEmittedTargetDecls.insert(D).second;
10747}
10748
10750 const OMPExecutableDirective &D,
10751 SourceLocation Loc,
10752 llvm::Function *OutlinedFn,
10753 ArrayRef<llvm::Value *> CapturedVars) {
10754 if (!CGF.HaveInsertPoint())
10755 return;
10756
10757 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10759
10760 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10761 llvm::Value *Args[] = {
10762 RTLoc,
10763 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10764 OutlinedFn};
10766 RealArgs.append(std::begin(Args), std::end(Args));
10767 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10768
10769 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10770 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10771 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10772}
10773
10775 const Expr *NumTeams,
10776 const Expr *ThreadLimit,
10777 SourceLocation Loc) {
10778 if (!CGF.HaveInsertPoint())
10779 return;
10780
10781 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10782
10783 llvm::Value *NumTeamsVal =
10784 NumTeams
10785 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10786 CGF.CGM.Int32Ty, /* isSigned = */ true)
10787 : CGF.Builder.getInt32(0);
10788
10789 llvm::Value *ThreadLimitVal =
10790 ThreadLimit
10791 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10792 CGF.CGM.Int32Ty, /* isSigned = */ true)
10793 : CGF.Builder.getInt32(0);
10794
10795 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10796 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10797 ThreadLimitVal};
10798 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10799 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10800 PushNumTeamsArgs);
10801}
10802
10804 const Expr *ThreadLimit,
10805 SourceLocation Loc) {
10806 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10807 llvm::Value *ThreadLimitVal =
10808 ThreadLimit
10809 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10810 CGF.CGM.Int32Ty, /* isSigned = */ true)
10811 : CGF.Builder.getInt32(0);
10812
10813 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10814 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10815 ThreadLimitVal};
10816 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10817 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10818 ThreadLimitArgs);
10819}
10820
10822 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10823 const Expr *Device, const RegionCodeGenTy &CodeGen,
10825 if (!CGF.HaveInsertPoint())
10826 return;
10827
10828 // Action used to replace the default codegen action and turn privatization
10829 // off.
10830 PrePostActionTy NoPrivAction;
10831
10832 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10833
10834 llvm::Value *IfCondVal = nullptr;
10835 if (IfCond)
10836 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10837
10838 // Emit device ID if any.
10839 llvm::Value *DeviceID = nullptr;
10840 if (Device) {
10841 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10842 CGF.Int64Ty, /*isSigned=*/true);
10843 } else {
10844 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10845 }
10846
10847 // Fill up the arrays with all the mapped variables.
10848 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10849 auto GenMapInfoCB =
10850 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10851 CGF.Builder.restoreIP(CodeGenIP);
10852 // Get map clause information.
10853 MappableExprsHandler MEHandler(D, CGF);
10854 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10855
10856 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10857 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10858 };
10859 if (CGM.getCodeGenOpts().getDebugInfo() !=
10860 llvm::codegenoptions::NoDebugInfo) {
10861 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10862 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10863 FillInfoMap);
10864 }
10865
10866 return CombinedInfo;
10867 };
10868 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10869 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10870 CGF.Builder.restoreIP(CodeGenIP);
10871 switch (BodyGenType) {
10872 case BodyGenTy::Priv:
10873 if (!Info.CaptureDeviceAddrMap.empty())
10874 CodeGen(CGF);
10875 break;
10876 case BodyGenTy::DupNoPriv:
10877 if (!Info.CaptureDeviceAddrMap.empty()) {
10878 CodeGen.setAction(NoPrivAction);
10879 CodeGen(CGF);
10880 }
10881 break;
10882 case BodyGenTy::NoPriv:
10883 if (Info.CaptureDeviceAddrMap.empty()) {
10884 CodeGen.setAction(NoPrivAction);
10885 CodeGen(CGF);
10886 }
10887 break;
10888 }
10889 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10890 CGF.Builder.GetInsertPoint());
10891 };
10892
10893 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10894 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10895 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10896 }
10897 };
10898
10899 auto CustomMapperCB = [&](unsigned int I) {
10900 llvm::Function *MFunc = nullptr;
10901 if (CombinedInfo.Mappers[I]) {
10902 Info.HasMapper = true;
10904 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10905 }
10906 return MFunc;
10907 };
10908
10909 // Source location for the ident struct
10910 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10911
10912 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10913 CGF.AllocaInsertPt->getIterator());
10914 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10915 CGF.Builder.GetInsertPoint());
10916 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10917 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10918 cantFail(OMPBuilder.createTargetData(
10919 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10920 CustomMapperCB,
10921 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
10922 CGF.Builder.restoreIP(AfterIP);
10923}
10924
10926 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10927 const Expr *Device) {
10928 if (!CGF.HaveInsertPoint())
10929 return;
10930
10934 "Expecting either target enter, exit data, or update directives.");
10935
10937 llvm::Value *MapTypesArray = nullptr;
10938 llvm::Value *MapNamesArray = nullptr;
10939 // Generate the code for the opening of the data environment.
10940 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10941 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10942 // Emit device ID if any.
10943 llvm::Value *DeviceID = nullptr;
10944 if (Device) {
10945 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10946 CGF.Int64Ty, /*isSigned=*/true);
10947 } else {
10948 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10949 }
10950
10951 // Emit the number of elements in the offloading arrays.
10952 llvm::Constant *PointerNum =
10953 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10954
10955 // Source location for the ident struct
10956 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10957
10958 SmallVector<llvm::Value *, 13> OffloadingArgs(
10959 {RTLoc, DeviceID, PointerNum,
10960 InputInfo.BasePointersArray.emitRawPointer(CGF),
10961 InputInfo.PointersArray.emitRawPointer(CGF),
10962 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10963 InputInfo.MappersArray.emitRawPointer(CGF)});
10964
10965 // Select the right runtime function call for each standalone
10966 // directive.
10967 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10968 RuntimeFunction RTLFn;
10969 switch (D.getDirectiveKind()) {
10970 case OMPD_target_enter_data:
10971 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10972 : OMPRTL___tgt_target_data_begin_mapper;
10973 break;
10974 case OMPD_target_exit_data:
10975 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10976 : OMPRTL___tgt_target_data_end_mapper;
10977 break;
10978 case OMPD_target_update:
10979 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10980 : OMPRTL___tgt_target_data_update_mapper;
10981 break;
10982 case OMPD_parallel:
10983 case OMPD_for:
10984 case OMPD_parallel_for:
10985 case OMPD_parallel_master:
10986 case OMPD_parallel_sections:
10987 case OMPD_for_simd:
10988 case OMPD_parallel_for_simd:
10989 case OMPD_cancel:
10990 case OMPD_cancellation_point:
10991 case OMPD_ordered:
10992 case OMPD_threadprivate:
10993 case OMPD_allocate:
10994 case OMPD_task:
10995 case OMPD_simd:
10996 case OMPD_tile:
10997 case OMPD_unroll:
10998 case OMPD_sections:
10999 case OMPD_section:
11000 case OMPD_single:
11001 case OMPD_master:
11002 case OMPD_critical:
11003 case OMPD_taskyield:
11004 case OMPD_barrier:
11005 case OMPD_taskwait:
11006 case OMPD_taskgroup:
11007 case OMPD_atomic:
11008 case OMPD_flush:
11009 case OMPD_depobj:
11010 case OMPD_scan:
11011 case OMPD_teams:
11012 case OMPD_target_data:
11013 case OMPD_distribute:
11014 case OMPD_distribute_simd:
11015 case OMPD_distribute_parallel_for:
11016 case OMPD_distribute_parallel_for_simd:
11017 case OMPD_teams_distribute:
11018 case OMPD_teams_distribute_simd:
11019 case OMPD_teams_distribute_parallel_for:
11020 case OMPD_teams_distribute_parallel_for_simd:
11021 case OMPD_declare_simd:
11022 case OMPD_declare_variant:
11023 case OMPD_begin_declare_variant:
11024 case OMPD_end_declare_variant:
11025 case OMPD_declare_target:
11026 case OMPD_end_declare_target:
11027 case OMPD_declare_reduction:
11028 case OMPD_declare_mapper:
11029 case OMPD_taskloop:
11030 case OMPD_taskloop_simd:
11031 case OMPD_master_taskloop:
11032 case OMPD_master_taskloop_simd:
11033 case OMPD_parallel_master_taskloop:
11034 case OMPD_parallel_master_taskloop_simd:
11035 case OMPD_target:
11036 case OMPD_target_simd:
11037 case OMPD_target_teams_distribute:
11038 case OMPD_target_teams_distribute_simd:
11039 case OMPD_target_teams_distribute_parallel_for:
11040 case OMPD_target_teams_distribute_parallel_for_simd:
11041 case OMPD_target_teams:
11042 case OMPD_target_parallel:
11043 case OMPD_target_parallel_for:
11044 case OMPD_target_parallel_for_simd:
11045 case OMPD_requires:
11046 case OMPD_metadirective:
11047 case OMPD_unknown:
11048 default:
11049 llvm_unreachable("Unexpected standalone target data directive.");
11050 break;
11051 }
11052 if (HasNowait) {
11053 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11054 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11055 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11056 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11057 }
11058 CGF.EmitRuntimeCall(
11059 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11060 OffloadingArgs);
11061 };
11062
11063 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11064 &MapNamesArray](CodeGenFunction &CGF,
11065 PrePostActionTy &) {
11066 // Fill up the arrays with all the mapped variables.
11067 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11069 MappableExprsHandler MEHandler(D, CGF);
11070 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11071 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11072 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11073
11074 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11075 D.hasClausesOfKind<OMPNowaitClause>();
11076
11077 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11078 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11079 CGF.VoidPtrTy, CGM.getPointerAlign());
11080 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11081 CGM.getPointerAlign());
11082 InputInfo.SizesArray =
11083 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11084 InputInfo.MappersArray =
11085 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11086 MapTypesArray = Info.RTArgs.MapTypesArray;
11087 MapNamesArray = Info.RTArgs.MapNamesArray;
11088 if (RequiresOuterTask)
11089 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11090 else
11091 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11092 };
11093
11094 if (IfCond) {
11095 emitIfClause(CGF, IfCond, TargetThenGen,
11096 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11097 } else {
11098 RegionCodeGenTy ThenRCG(TargetThenGen);
11099 ThenRCG(CGF);
11100 }
11101}
11102
11103namespace {
11104 /// Kind of parameter in a function with 'declare simd' directive.
11105enum ParamKindTy {
11106 Linear,
11107 LinearRef,
11108 LinearUVal,
11109 LinearVal,
11110 Uniform,
11111 Vector,
11112};
11113/// Attribute set of the parameter.
11114struct ParamAttrTy {
11115 ParamKindTy Kind = Vector;
11116 llvm::APSInt StrideOrArg;
11117 llvm::APSInt Alignment;
11118 bool HasVarStride = false;
11119};
11120} // namespace
11121
11122static unsigned evaluateCDTSize(const FunctionDecl *FD,
11123 ArrayRef<ParamAttrTy> ParamAttrs) {
11124 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11125 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11126 // of that clause. The VLEN value must be power of 2.
11127 // In other case the notion of the function`s "characteristic data type" (CDT)
11128 // is used to compute the vector length.
11129 // CDT is defined in the following order:
11130 // a) For non-void function, the CDT is the return type.
11131 // b) If the function has any non-uniform, non-linear parameters, then the
11132 // CDT is the type of the first such parameter.
11133 // c) If the CDT determined by a) or b) above is struct, union, or class
11134 // type which is pass-by-value (except for the type that maps to the
11135 // built-in complex data type), the characteristic data type is int.
11136 // d) If none of the above three cases is applicable, the CDT is int.
11137 // The VLEN is then determined based on the CDT and the size of vector
11138 // register of that ISA for which current vector version is generated. The
11139 // VLEN is computed using the formula below:
11140 // VLEN = sizeof(vector_register) / sizeof(CDT),
11141 // where vector register size specified in section 3.2.1 Registers and the
11142 // Stack Frame of original AMD64 ABI document.
11143 QualType RetType = FD->getReturnType();
11144 if (RetType.isNull())
11145 return 0;
11146 ASTContext &C = FD->getASTContext();
11147 QualType CDT;
11148 if (!RetType.isNull() && !RetType->isVoidType()) {
11149 CDT = RetType;
11150 } else {
11151 unsigned Offset = 0;
11152 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11153 if (ParamAttrs[Offset].Kind == Vector)
11154 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11155 ++Offset;
11156 }
11157 if (CDT.isNull()) {
11158 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11159 if (ParamAttrs[I + Offset].Kind == Vector) {
11160 CDT = FD->getParamDecl(I)->getType();
11161 break;
11162 }
11163 }
11164 }
11165 }
11166 if (CDT.isNull())
11167 CDT = C.IntTy;
11168 CDT = CDT->getCanonicalTypeUnqualified();
11169 if (CDT->isRecordType() || CDT->isUnionType())
11170 CDT = C.IntTy;
11171 return C.getTypeSize(CDT);
11172}
11173
11174/// Mangle the parameter part of the vector function name according to
11175/// their OpenMP classification. The mangling function is defined in
11176/// section 4.5 of the AAVFABI(2021Q1).
11177static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11178 SmallString<256> Buffer;
11179 llvm::raw_svector_ostream Out(Buffer);
11180 for (const auto &ParamAttr : ParamAttrs) {
11181 switch (ParamAttr.Kind) {
11182 case Linear:
11183 Out << 'l';
11184 break;
11185 case LinearRef:
11186 Out << 'R';
11187 break;
11188 case LinearUVal:
11189 Out << 'U';
11190 break;
11191 case LinearVal:
11192 Out << 'L';
11193 break;
11194 case Uniform:
11195 Out << 'u';
11196 break;
11197 case Vector:
11198 Out << 'v';
11199 break;
11200 }
11201 if (ParamAttr.HasVarStride)
11202 Out << "s" << ParamAttr.StrideOrArg;
11203 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11204 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11205 // Don't print the step value if it is not present or if it is
11206 // equal to 1.
11207 if (ParamAttr.StrideOrArg < 0)
11208 Out << 'n' << -ParamAttr.StrideOrArg;
11209 else if (ParamAttr.StrideOrArg != 1)
11210 Out << ParamAttr.StrideOrArg;
11211 }
11212
11213 if (!!ParamAttr.Alignment)
11214 Out << 'a' << ParamAttr.Alignment;
11215 }
11216
11217 return std::string(Out.str());
11218}
11219
11220static void
11221emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11222 const llvm::APSInt &VLENVal,
11223 ArrayRef<ParamAttrTy> ParamAttrs,
11224 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11225 struct ISADataTy {
11226 char ISA;
11227 unsigned VecRegSize;
11228 };
11229 ISADataTy ISAData[] = {
11230 {
11231 'b', 128
11232 }, // SSE
11233 {
11234 'c', 256
11235 }, // AVX
11236 {
11237 'd', 256
11238 }, // AVX2
11239 {
11240 'e', 512
11241 }, // AVX512
11242 };
11244 switch (State) {
11245 case OMPDeclareSimdDeclAttr::BS_Undefined:
11246 Masked.push_back('N');
11247 Masked.push_back('M');
11248 break;
11249 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11250 Masked.push_back('N');
11251 break;
11252 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11253 Masked.push_back('M');
11254 break;
11255 }
11256 for (char Mask : Masked) {
11257 for (const ISADataTy &Data : ISAData) {
11258 SmallString<256> Buffer;
11259 llvm::raw_svector_ostream Out(Buffer);
11260 Out << "_ZGV" << Data.ISA << Mask;
11261 if (!VLENVal) {
11262 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11263 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11264 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11265 } else {
11266 Out << VLENVal;
11267 }
11268 Out << mangleVectorParameters(ParamAttrs);
11269 Out << '_' << Fn->getName();
11270 Fn->addFnAttr(Out.str());
11271 }
11272 }
11273}
11274
11275// This are the Functions that are needed to mangle the name of the
11276// vector functions generated by the compiler, according to the rules
11277// defined in the "Vector Function ABI specifications for AArch64",
11278// available at
11279// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11280
11281/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11282static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11283 QT = QT.getCanonicalType();
11284
11285 if (QT->isVoidType())
11286 return false;
11287
11288 if (Kind == ParamKindTy::Uniform)
11289 return false;
11290
11291 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
11292 return false;
11293
11294 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11295 !QT->isReferenceType())
11296 return false;
11297
11298 return true;
11299}
11300
11301/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11303 QT = QT.getCanonicalType();
11304 unsigned Size = C.getTypeSize(QT);
11305
11306 // Only scalars and complex within 16 bytes wide set PVB to true.
11307 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11308 return false;
11309
11310 if (QT->isFloatingType())
11311 return true;
11312
11313 if (QT->isIntegerType())
11314 return true;
11315
11316 if (QT->isPointerType())
11317 return true;
11318
11319 // TODO: Add support for complex types (section 3.1.2, item 2).
11320
11321 return false;
11322}
11323
11324/// Computes the lane size (LS) of a return type or of an input parameter,
11325/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11326/// TODO: Add support for references, section 3.2.1, item 1.
11327static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11328 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11330 if (getAArch64PBV(PTy, C))
11331 return C.getTypeSize(PTy);
11332 }
11333 if (getAArch64PBV(QT, C))
11334 return C.getTypeSize(QT);
11335
11336 return C.getTypeSize(C.getUIntPtrType());
11337}
11338
11339// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11340// signature of the scalar function, as defined in 3.2.2 of the
11341// AAVFABI.
11342static std::tuple<unsigned, unsigned, bool>
11344 QualType RetType = FD->getReturnType().getCanonicalType();
11345
11346 ASTContext &C = FD->getASTContext();
11347
11348 bool OutputBecomesInput = false;
11349
11351 if (!RetType->isVoidType()) {
11352 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11353 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11354 OutputBecomesInput = true;
11355 }
11356 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11358 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11359 }
11360
11361 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11362 // The LS of a function parameter / return value can only be a power
11363 // of 2, starting from 8 bits, up to 128.
11364 assert(llvm::all_of(Sizes,
11365 [](unsigned Size) {
11366 return Size == 8 || Size == 16 || Size == 32 ||
11367 Size == 64 || Size == 128;
11368 }) &&
11369 "Invalid size");
11370
11371 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
11372 OutputBecomesInput);
11373}
11374
11375// Function used to add the attribute. The parameter `VLEN` is
11376// templated to allow the use of "x" when targeting scalable functions
11377// for SVE.
11378template <typename T>
11379static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11380 char ISA, StringRef ParSeq,
11381 StringRef MangledName, bool OutputBecomesInput,
11382 llvm::Function *Fn) {
11383 SmallString<256> Buffer;
11384 llvm::raw_svector_ostream Out(Buffer);
11385 Out << Prefix << ISA << LMask << VLEN;
11386 if (OutputBecomesInput)
11387 Out << "v";
11388 Out << ParSeq << "_" << MangledName;
11389 Fn->addFnAttr(Out.str());
11390}
11391
11392// Helper function to generate the Advanced SIMD names depending on
11393// the value of the NDS when simdlen is not present.
11394static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11395 StringRef Prefix, char ISA,
11396 StringRef ParSeq, StringRef MangledName,
11397 bool OutputBecomesInput,
11398 llvm::Function *Fn) {
11399 switch (NDS) {
11400 case 8:
11401 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11402 OutputBecomesInput, Fn);
11403 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11404 OutputBecomesInput, Fn);
11405 break;
11406 case 16:
11407 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11408 OutputBecomesInput, Fn);
11409 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11410 OutputBecomesInput, Fn);
11411 break;
11412 case 32:
11413 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11414 OutputBecomesInput, Fn);
11415 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11416 OutputBecomesInput, Fn);
11417 break;
11418 case 64:
11419 case 128:
11420 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11421 OutputBecomesInput, Fn);
11422 break;
11423 default:
11424 llvm_unreachable("Scalar type is too wide.");
11425 }
11426}
11427
11428/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11430 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11431 ArrayRef<ParamAttrTy> ParamAttrs,
11432 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11433 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11434
11435 // Get basic data for building the vector signature.
11436 const auto Data = getNDSWDS(FD, ParamAttrs);
11437 const unsigned NDS = std::get<0>(Data);
11438 const unsigned WDS = std::get<1>(Data);
11439 const bool OutputBecomesInput = std::get<2>(Data);
11440
11441 // Check the values provided via `simdlen` by the user.
11442 // 1. A `simdlen(1)` doesn't produce vector signatures,
11443 if (UserVLEN == 1) {
11444 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11446 "The clause simdlen(1) has no effect when targeting aarch64.");
11447 CGM.getDiags().Report(SLoc, DiagID);
11448 return;
11449 }
11450
11451 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11452 // Advanced SIMD output.
11453 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11454 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11455 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11456 "power of 2 when targeting Advanced SIMD.");
11457 CGM.getDiags().Report(SLoc, DiagID);
11458 return;
11459 }
11460
11461 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11462 // limits.
11463 if (ISA == 's' && UserVLEN != 0) {
11464 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11465 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11466 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11467 "lanes in the architectural constraints "
11468 "for SVE (min is 128-bit, max is "
11469 "2048-bit, by steps of 128-bit)");
11470 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11471 return;
11472 }
11473 }
11474
11475 // Sort out parameter sequence.
11476 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11477 StringRef Prefix = "_ZGV";
11478 // Generate simdlen from user input (if any).
11479 if (UserVLEN) {
11480 if (ISA == 's') {
11481 // SVE generates only a masked function.
11482 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11483 OutputBecomesInput, Fn);
11484 } else {
11485 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11486 // Advanced SIMD generates one or two functions, depending on
11487 // the `[not]inbranch` clause.
11488 switch (State) {
11489 case OMPDeclareSimdDeclAttr::BS_Undefined:
11490 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11491 OutputBecomesInput, Fn);
11492 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11493 OutputBecomesInput, Fn);
11494 break;
11495 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11496 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11497 OutputBecomesInput, Fn);
11498 break;
11499 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11500 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11501 OutputBecomesInput, Fn);
11502 break;
11503 }
11504 }
11505 } else {
11506 // If no user simdlen is provided, follow the AAVFABI rules for
11507 // generating the vector length.
11508 if (ISA == 's') {
11509 // SVE, section 3.4.1, item 1.
11510 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11511 OutputBecomesInput, Fn);
11512 } else {
11513 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11514 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11515 // two vector names depending on the use of the clause
11516 // `[not]inbranch`.
11517 switch (State) {
11518 case OMPDeclareSimdDeclAttr::BS_Undefined:
11519 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11520 OutputBecomesInput, Fn);
11521 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11522 OutputBecomesInput, Fn);
11523 break;
11524 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11525 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11526 OutputBecomesInput, Fn);
11527 break;
11528 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11529 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11530 OutputBecomesInput, Fn);
11531 break;
11532 }
11533 }
11534 }
11535}
11536
11538 llvm::Function *Fn) {
11539 ASTContext &C = CGM.getContext();
11540 FD = FD->getMostRecentDecl();
11541 while (FD) {
11542 // Map params to their positions in function decl.
11543 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11544 if (isa<CXXMethodDecl>(FD))
11545 ParamPositions.try_emplace(FD, 0);
11546 unsigned ParamPos = ParamPositions.size();
11547 for (const ParmVarDecl *P : FD->parameters()) {
11548 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11549 ++ParamPos;
11550 }
11551 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11552 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11553 // Mark uniform parameters.
11554 for (const Expr *E : Attr->uniforms()) {
11555 E = E->IgnoreParenImpCasts();
11556 unsigned Pos;
11557 if (isa<CXXThisExpr>(E)) {
11558 Pos = ParamPositions[FD];
11559 } else {
11560 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11561 ->getCanonicalDecl();
11562 auto It = ParamPositions.find(PVD);
11563 assert(It != ParamPositions.end() && "Function parameter not found");
11564 Pos = It->second;
11565 }
11566 ParamAttrs[Pos].Kind = Uniform;
11567 }
11568 // Get alignment info.
11569 auto *NI = Attr->alignments_begin();
11570 for (const Expr *E : Attr->aligneds()) {
11571 E = E->IgnoreParenImpCasts();
11572 unsigned Pos;
11573 QualType ParmTy;
11574 if (isa<CXXThisExpr>(E)) {
11575 Pos = ParamPositions[FD];
11576 ParmTy = E->getType();
11577 } else {
11578 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11579 ->getCanonicalDecl();
11580 auto It = ParamPositions.find(PVD);
11581 assert(It != ParamPositions.end() && "Function parameter not found");
11582 Pos = It->second;
11583 ParmTy = PVD->getType();
11584 }
11585 ParamAttrs[Pos].Alignment =
11586 (*NI)
11587 ? (*NI)->EvaluateKnownConstInt(C)
11588 : llvm::APSInt::getUnsigned(
11589 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11590 .getQuantity());
11591 ++NI;
11592 }
11593 // Mark linear parameters.
11594 auto *SI = Attr->steps_begin();
11595 auto *MI = Attr->modifiers_begin();
11596 for (const Expr *E : Attr->linears()) {
11597 E = E->IgnoreParenImpCasts();
11598 unsigned Pos;
11599 bool IsReferenceType = false;
11600 // Rescaling factor needed to compute the linear parameter
11601 // value in the mangled name.
11602 unsigned PtrRescalingFactor = 1;
11603 if (isa<CXXThisExpr>(E)) {
11604 Pos = ParamPositions[FD];
11605 auto *P = cast<PointerType>(E->getType());
11606 PtrRescalingFactor = CGM.getContext()
11607 .getTypeSizeInChars(P->getPointeeType())
11608 .getQuantity();
11609 } else {
11610 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11611 ->getCanonicalDecl();
11612 auto It = ParamPositions.find(PVD);
11613 assert(It != ParamPositions.end() && "Function parameter not found");
11614 Pos = It->second;
11615 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11616 PtrRescalingFactor = CGM.getContext()
11617 .getTypeSizeInChars(P->getPointeeType())
11618 .getQuantity();
11619 else if (PVD->getType()->isReferenceType()) {
11620 IsReferenceType = true;
11621 PtrRescalingFactor =
11622 CGM.getContext()
11623 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11624 .getQuantity();
11625 }
11626 }
11627 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11628 if (*MI == OMPC_LINEAR_ref)
11629 ParamAttr.Kind = LinearRef;
11630 else if (*MI == OMPC_LINEAR_uval)
11631 ParamAttr.Kind = LinearUVal;
11632 else if (IsReferenceType)
11633 ParamAttr.Kind = LinearVal;
11634 else
11635 ParamAttr.Kind = Linear;
11636 // Assuming a stride of 1, for `linear` without modifiers.
11637 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11638 if (*SI) {
11640 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11641 if (const auto *DRE =
11642 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11643 if (const auto *StridePVD =
11644 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11645 ParamAttr.HasVarStride = true;
11646 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11647 assert(It != ParamPositions.end() &&
11648 "Function parameter not found");
11649 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11650 }
11651 }
11652 } else {
11653 ParamAttr.StrideOrArg = Result.Val.getInt();
11654 }
11655 }
11656 // If we are using a linear clause on a pointer, we need to
11657 // rescale the value of linear_step with the byte size of the
11658 // pointee type.
11659 if (!ParamAttr.HasVarStride &&
11660 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11661 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11662 ++SI;
11663 ++MI;
11664 }
11665 llvm::APSInt VLENVal;
11666 SourceLocation ExprLoc;
11667 const Expr *VLENExpr = Attr->getSimdlen();
11668 if (VLENExpr) {
11669 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11670 ExprLoc = VLENExpr->getExprLoc();
11671 }
11672 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11673 if (CGM.getTriple().isX86()) {
11674 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11675 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11676 unsigned VLEN = VLENVal.getExtValue();
11677 StringRef MangledName = Fn->getName();
11678 if (CGM.getTarget().hasFeature("sve"))
11679 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11680 MangledName, 's', 128, Fn, ExprLoc);
11681 else if (CGM.getTarget().hasFeature("neon"))
11682 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11683 MangledName, 'n', 128, Fn, ExprLoc);
11684 }
11685 }
11686 FD = FD->getPreviousDecl();
11687 }
11688}
11689
11690namespace {
11691/// Cleanup action for doacross support.
11692class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11693public:
11694 static const int DoacrossFinArgs = 2;
11695
11696private:
11697 llvm::FunctionCallee RTLFn;
11698 llvm::Value *Args[DoacrossFinArgs];
11699
11700public:
11701 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11702 ArrayRef<llvm::Value *> CallArgs)
11703 : RTLFn(RTLFn) {
11704 assert(CallArgs.size() == DoacrossFinArgs);
11705 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11706 }
11707 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11708 if (!CGF.HaveInsertPoint())
11709 return;
11710 CGF.EmitRuntimeCall(RTLFn, Args);
11711 }
11712};
11713} // namespace
11714
11716 const OMPLoopDirective &D,
11717 ArrayRef<Expr *> NumIterations) {
11718 if (!CGF.HaveInsertPoint())
11719 return;
11720
11721 ASTContext &C = CGM.getContext();
11722 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11723 RecordDecl *RD;
11724 if (KmpDimTy.isNull()) {
11725 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11726 // kmp_int64 lo; // lower
11727 // kmp_int64 up; // upper
11728 // kmp_int64 st; // stride
11729 // };
11730 RD = C.buildImplicitRecord("kmp_dim");
11731 RD->startDefinition();
11732 addFieldToRecordDecl(C, RD, Int64Ty);
11733 addFieldToRecordDecl(C, RD, Int64Ty);
11734 addFieldToRecordDecl(C, RD, Int64Ty);
11735 RD->completeDefinition();
11736 KmpDimTy = C.getCanonicalTagType(RD);
11737 } else {
11738 RD = KmpDimTy->castAsRecordDecl();
11739 }
11740 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11741 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11743
11744 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11745 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11746 enum { LowerFD = 0, UpperFD, StrideFD };
11747 // Fill dims with data.
11748 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11749 LValue DimsLVal = CGF.MakeAddrLValue(
11750 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11751 // dims.upper = num_iterations;
11752 LValue UpperLVal = CGF.EmitLValueForField(
11753 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11754 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11755 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11756 Int64Ty, NumIterations[I]->getExprLoc());
11757 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11758 // dims.stride = 1;
11759 LValue StrideLVal = CGF.EmitLValueForField(
11760 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11761 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11762 StrideLVal);
11763 }
11764
11765 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11766 // kmp_int32 num_dims, struct kmp_dim * dims);
11767 llvm::Value *Args[] = {
11768 emitUpdateLocation(CGF, D.getBeginLoc()),
11769 getThreadID(CGF, D.getBeginLoc()),
11770 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11772 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11773 CGM.VoidPtrTy)};
11774
11775 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11776 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11777 CGF.EmitRuntimeCall(RTLFn, Args);
11778 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11779 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11780 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11781 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11782 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11783 llvm::ArrayRef(FiniArgs));
11784}
11785
11786template <typename T>
11788 const T *C, llvm::Value *ULoc,
11789 llvm::Value *ThreadID) {
11790 QualType Int64Ty =
11791 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11792 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11794 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11795 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11796 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11797 const Expr *CounterVal = C->getLoopData(I);
11798 assert(CounterVal);
11799 llvm::Value *CntVal = CGF.EmitScalarConversion(
11800 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11801 CounterVal->getExprLoc());
11802 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11803 /*Volatile=*/false, Int64Ty);
11804 }
11805 llvm::Value *Args[] = {
11806 ULoc, ThreadID,
11807 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11808 llvm::FunctionCallee RTLFn;
11809 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11810 OMPDoacrossKind<T> ODK;
11811 if (ODK.isSource(C)) {
11812 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11813 OMPRTL___kmpc_doacross_post);
11814 } else {
11815 assert(ODK.isSink(C) && "Expect sink modifier.");
11816 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11817 OMPRTL___kmpc_doacross_wait);
11818 }
11819 CGF.EmitRuntimeCall(RTLFn, Args);
11820}
11821
11823 const OMPDependClause *C) {
11825 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11826 getThreadID(CGF, C->getBeginLoc()));
11827}
11828
11830 const OMPDoacrossClause *C) {
11832 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11833 getThreadID(CGF, C->getBeginLoc()));
11834}
11835
11837 llvm::FunctionCallee Callee,
11838 ArrayRef<llvm::Value *> Args) const {
11839 assert(Loc.isValid() && "Outlined function call location must be valid.");
11841
11842 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11843 if (Fn->doesNotThrow()) {
11844 CGF.EmitNounwindRuntimeCall(Fn, Args);
11845 return;
11846 }
11847 }
11848 CGF.EmitRuntimeCall(Callee, Args);
11849}
11850
11852 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11853 ArrayRef<llvm::Value *> Args) const {
11854 emitCall(CGF, Loc, OutlinedFn, Args);
11855}
11856
11858 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11859 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11861}
11862
11864 const VarDecl *NativeParam,
11865 const VarDecl *TargetParam) const {
11866 return CGF.GetAddrOfLocalVar(NativeParam);
11867}
11868
11869/// Return allocator value from expression, or return a null allocator (default
11870/// when no allocator specified).
11871static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11872 const Expr *Allocator) {
11873 llvm::Value *AllocVal;
11874 if (Allocator) {
11875 AllocVal = CGF.EmitScalarExpr(Allocator);
11876 // According to the standard, the original allocator type is a enum
11877 // (integer). Convert to pointer type, if required.
11878 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11879 CGF.getContext().VoidPtrTy,
11880 Allocator->getExprLoc());
11881 } else {
11882 // If no allocator specified, it defaults to the null allocator.
11883 AllocVal = llvm::Constant::getNullValue(
11885 }
11886 return AllocVal;
11887}
11888
11889/// Return the alignment from an allocate directive if present.
11890static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11891 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11892
11893 if (!AllocateAlignment)
11894 return nullptr;
11895
11896 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11897}
11898
11900 const VarDecl *VD) {
11901 if (!VD)
11902 return Address::invalid();
11903 Address UntiedAddr = Address::invalid();
11904 Address UntiedRealAddr = Address::invalid();
11905 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11906 if (It != FunctionToUntiedTaskStackMap.end()) {
11907 const UntiedLocalVarsAddressesMap &UntiedData =
11908 UntiedLocalVarsStack[It->second];
11909 auto I = UntiedData.find(VD);
11910 if (I != UntiedData.end()) {
11911 UntiedAddr = I->second.first;
11912 UntiedRealAddr = I->second.second;
11913 }
11914 }
11915 const VarDecl *CVD = VD->getCanonicalDecl();
11916 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11917 // Use the default allocation.
11918 if (!isAllocatableDecl(VD))
11919 return UntiedAddr;
11920 llvm::Value *Size;
11921 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11922 if (CVD->getType()->isVariablyModifiedType()) {
11923 Size = CGF.getTypeSize(CVD->getType());
11924 // Align the size: ((size + align - 1) / align) * align
11925 Size = CGF.Builder.CreateNUWAdd(
11926 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11927 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11928 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11929 } else {
11930 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11931 Size = CGM.getSize(Sz.alignTo(Align));
11932 }
11933 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11934 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11935 const Expr *Allocator = AA->getAllocator();
11936 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11937 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11939 Args.push_back(ThreadID);
11940 if (Alignment)
11941 Args.push_back(Alignment);
11942 Args.push_back(Size);
11943 Args.push_back(AllocVal);
11944 llvm::omp::RuntimeFunction FnID =
11945 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11946 llvm::Value *Addr = CGF.EmitRuntimeCall(
11947 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11948 getName({CVD->getName(), ".void.addr"}));
11949 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11950 CGM.getModule(), OMPRTL___kmpc_free);
11951 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11953 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11954 if (UntiedAddr.isValid())
11955 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11956
11957 // Cleanup action for allocate support.
11958 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11959 llvm::FunctionCallee RTLFn;
11960 SourceLocation::UIntTy LocEncoding;
11961 Address Addr;
11962 const Expr *AllocExpr;
11963
11964 public:
11965 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11966 SourceLocation::UIntTy LocEncoding, Address Addr,
11967 const Expr *AllocExpr)
11968 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11969 AllocExpr(AllocExpr) {}
11970 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11971 if (!CGF.HaveInsertPoint())
11972 return;
11973 llvm::Value *Args[3];
11974 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11975 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11977 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11978 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11979 Args[2] = AllocVal;
11980 CGF.EmitRuntimeCall(RTLFn, Args);
11981 }
11982 };
11983 Address VDAddr =
11984 UntiedRealAddr.isValid()
11985 ? UntiedRealAddr
11986 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11987 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11988 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11989 VDAddr, Allocator);
11990 if (UntiedRealAddr.isValid())
11991 if (auto *Region =
11992 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11993 Region->emitUntiedSwitch(CGF);
11994 return VDAddr;
11995 }
11996 return UntiedAddr;
11997}
11998
12000 const VarDecl *VD) const {
12001 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12002 if (It == FunctionToUntiedTaskStackMap.end())
12003 return false;
12004 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12005}
12006
12008 CodeGenModule &CGM, const OMPLoopDirective &S)
12009 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12010 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12011 if (!NeedToPush)
12012 return;
12014 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12015 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12016 for (const Stmt *Ref : C->private_refs()) {
12017 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12018 const ValueDecl *VD;
12019 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12020 VD = DRE->getDecl();
12021 } else {
12022 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12023 assert((ME->isImplicitCXXThis() ||
12024 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12025 "Expected member of current class.");
12026 VD = ME->getMemberDecl();
12027 }
12028 DS.insert(VD);
12029 }
12030 }
12031}
12032
12034 if (!NeedToPush)
12035 return;
12036 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12037}
12038
12040 CodeGenFunction &CGF,
12041 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12042 std::pair<Address, Address>> &LocalVars)
12043 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12044 if (!NeedToPush)
12045 return;
12046 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12047 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12048 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12049}
12050
12052 if (!NeedToPush)
12053 return;
12054 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12055}
12056
12058 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12059
12060 return llvm::any_of(
12061 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12062 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12063}
12064
12065void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12066 const OMPExecutableDirective &S,
12067 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12068 const {
12069 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12070 // Vars in target/task regions must be excluded completely.
12071 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12072 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12074 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12075 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12076 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12077 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12078 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12079 }
12080 }
12081 // Exclude vars in private clauses.
12082 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12083 for (const Expr *Ref : C->varlist()) {
12084 if (!Ref->getType()->isScalarType())
12085 continue;
12086 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12087 if (!DRE)
12088 continue;
12089 NeedToCheckForLPCs.insert(DRE->getDecl());
12090 }
12091 }
12092 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12093 for (const Expr *Ref : C->varlist()) {
12094 if (!Ref->getType()->isScalarType())
12095 continue;
12096 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12097 if (!DRE)
12098 continue;
12099 NeedToCheckForLPCs.insert(DRE->getDecl());
12100 }
12101 }
12102 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12103 for (const Expr *Ref : C->varlist()) {
12104 if (!Ref->getType()->isScalarType())
12105 continue;
12106 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12107 if (!DRE)
12108 continue;
12109 NeedToCheckForLPCs.insert(DRE->getDecl());
12110 }
12111 }
12112 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12113 for (const Expr *Ref : C->varlist()) {
12114 if (!Ref->getType()->isScalarType())
12115 continue;
12116 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12117 if (!DRE)
12118 continue;
12119 NeedToCheckForLPCs.insert(DRE->getDecl());
12120 }
12121 }
12122 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12123 for (const Expr *Ref : C->varlist()) {
12124 if (!Ref->getType()->isScalarType())
12125 continue;
12126 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12127 if (!DRE)
12128 continue;
12129 NeedToCheckForLPCs.insert(DRE->getDecl());
12130 }
12131 }
12132 for (const Decl *VD : NeedToCheckForLPCs) {
12133 for (const LastprivateConditionalData &Data :
12134 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12135 if (Data.DeclToUniqueName.count(VD) > 0) {
12136 if (!Data.Disabled)
12137 NeedToAddForLPCsAsDisabled.insert(VD);
12138 break;
12139 }
12140 }
12141 }
12142}
12143
12144CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12145 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12146 : CGM(CGF.CGM),
12147 Action((CGM.getLangOpts().OpenMP >= 50 &&
12148 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12149 [](const OMPLastprivateClause *C) {
12150 return C->getKind() ==
12151 OMPC_LASTPRIVATE_conditional;
12152 }))
12153 ? ActionToDo::PushAsLastprivateConditional
12154 : ActionToDo::DoNotPush) {
12155 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12156 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12157 return;
12158 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12159 "Expected a push action.");
12161 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12162 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12163 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12164 continue;
12165
12166 for (const Expr *Ref : C->varlist()) {
12167 Data.DeclToUniqueName.insert(std::make_pair(
12168 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12169 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12170 }
12171 }
12172 Data.IVLVal = IVLVal;
12173 Data.Fn = CGF.CurFn;
12174}
12175
12176CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12178 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12179 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12180 if (CGM.getLangOpts().OpenMP < 50)
12181 return;
12182 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12183 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12184 if (!NeedToAddForLPCsAsDisabled.empty()) {
12185 Action = ActionToDo::DisableLastprivateConditional;
12186 LastprivateConditionalData &Data =
12188 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12189 Data.DeclToUniqueName.try_emplace(VD);
12190 Data.Fn = CGF.CurFn;
12191 Data.Disabled = true;
12192 }
12193}
12194
12195CGOpenMPRuntime::LastprivateConditionalRAII
12197 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12198 return LastprivateConditionalRAII(CGF, S);
12199}
12200
12202 if (CGM.getLangOpts().OpenMP < 50)
12203 return;
12204 if (Action == ActionToDo::DisableLastprivateConditional) {
12205 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12206 "Expected list of disabled private vars.");
12207 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12208 }
12209 if (Action == ActionToDo::PushAsLastprivateConditional) {
12210 assert(
12211 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12212 "Expected list of lastprivate conditional vars.");
12213 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12214 }
12215}
12216
12218 const VarDecl *VD) {
12219 ASTContext &C = CGM.getContext();
12220 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12221 QualType NewType;
12222 const FieldDecl *VDField;
12223 const FieldDecl *FiredField;
12224 LValue BaseLVal;
12225 auto VI = I->getSecond().find(VD);
12226 if (VI == I->getSecond().end()) {
12227 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12228 RD->startDefinition();
12229 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12230 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12231 RD->completeDefinition();
12232 NewType = C.getCanonicalTagType(RD);
12233 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12234 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12235 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12236 } else {
12237 NewType = std::get<0>(VI->getSecond());
12238 VDField = std::get<1>(VI->getSecond());
12239 FiredField = std::get<2>(VI->getSecond());
12240 BaseLVal = std::get<3>(VI->getSecond());
12241 }
12242 LValue FiredLVal =
12243 CGF.EmitLValueForField(BaseLVal, FiredField);
12245 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12246 FiredLVal);
12247 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12248}
12249
12250namespace {
12251/// Checks if the lastprivate conditional variable is referenced in LHS.
12252class LastprivateConditionalRefChecker final
12253 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12255 const Expr *FoundE = nullptr;
12256 const Decl *FoundD = nullptr;
12257 StringRef UniqueDeclName;
12258 LValue IVLVal;
12259 llvm::Function *FoundFn = nullptr;
12260 SourceLocation Loc;
12261
12262public:
12263 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12265 llvm::reverse(LPM)) {
12266 auto It = D.DeclToUniqueName.find(E->getDecl());
12267 if (It == D.DeclToUniqueName.end())
12268 continue;
12269 if (D.Disabled)
12270 return false;
12271 FoundE = E;
12272 FoundD = E->getDecl()->getCanonicalDecl();
12273 UniqueDeclName = It->second;
12274 IVLVal = D.IVLVal;
12275 FoundFn = D.Fn;
12276 break;
12277 }
12278 return FoundE == E;
12279 }
12280 bool VisitMemberExpr(const MemberExpr *E) {
12282 return false;
12284 llvm::reverse(LPM)) {
12285 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12286 if (It == D.DeclToUniqueName.end())
12287 continue;
12288 if (D.Disabled)
12289 return false;
12290 FoundE = E;
12291 FoundD = E->getMemberDecl()->getCanonicalDecl();
12292 UniqueDeclName = It->second;
12293 IVLVal = D.IVLVal;
12294 FoundFn = D.Fn;
12295 break;
12296 }
12297 return FoundE == E;
12298 }
12299 bool VisitStmt(const Stmt *S) {
12300 for (const Stmt *Child : S->children()) {
12301 if (!Child)
12302 continue;
12303 if (const auto *E = dyn_cast<Expr>(Child))
12304 if (!E->isGLValue())
12305 continue;
12306 if (Visit(Child))
12307 return true;
12308 }
12309 return false;
12310 }
12311 explicit LastprivateConditionalRefChecker(
12312 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12313 : LPM(LPM) {}
12314 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12315 getFoundData() const {
12316 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12317 }
12318};
12319} // namespace
12320
12322 LValue IVLVal,
12323 StringRef UniqueDeclName,
12324 LValue LVal,
12325 SourceLocation Loc) {
12326 // Last updated loop counter for the lastprivate conditional var.
12327 // int<xx> last_iv = 0;
12328 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12329 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12330 LLIVTy, getName({UniqueDeclName, "iv"}));
12331 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12332 IVLVal.getAlignment().getAsAlign());
12333 LValue LastIVLVal =
12334 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12335
12336 // Last value of the lastprivate conditional.
12337 // decltype(priv_a) last_a;
12338 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12339 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12340 cast<llvm::GlobalVariable>(Last)->setAlignment(
12341 LVal.getAlignment().getAsAlign());
12342 LValue LastLVal =
12343 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12344
12345 // Global loop counter. Required to handle inner parallel-for regions.
12346 // iv
12347 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12348
12349 // #pragma omp critical(a)
12350 // if (last_iv <= iv) {
12351 // last_iv = iv;
12352 // last_a = priv_a;
12353 // }
12354 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12355 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12356 Action.Enter(CGF);
12357 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12358 // (last_iv <= iv) ? Check if the variable is updated and store new
12359 // value in global var.
12360 llvm::Value *CmpRes;
12361 if (IVLVal.getType()->isSignedIntegerType()) {
12362 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12363 } else {
12364 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12365 "Loop iteration variable must be integer.");
12366 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12367 }
12368 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12369 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12370 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12371 // {
12372 CGF.EmitBlock(ThenBB);
12373
12374 // last_iv = iv;
12375 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12376
12377 // last_a = priv_a;
12378 switch (CGF.getEvaluationKind(LVal.getType())) {
12379 case TEK_Scalar: {
12380 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12381 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12382 break;
12383 }
12384 case TEK_Complex: {
12385 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12386 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12387 break;
12388 }
12389 case TEK_Aggregate:
12390 llvm_unreachable(
12391 "Aggregates are not supported in lastprivate conditional.");
12392 }
12393 // }
12394 CGF.EmitBranch(ExitBB);
12395 // There is no need to emit line number for unconditional branch.
12397 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12398 };
12399
12400 if (CGM.getLangOpts().OpenMPSimd) {
12401 // Do not emit as a critical region as no parallel region could be emitted.
12402 RegionCodeGenTy ThenRCG(CodeGen);
12403 ThenRCG(CGF);
12404 } else {
12405 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12406 }
12407}
12408
12410 const Expr *LHS) {
12411 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12412 return;
12413 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12414 if (!Checker.Visit(LHS))
12415 return;
12416 const Expr *FoundE;
12417 const Decl *FoundD;
12418 StringRef UniqueDeclName;
12419 LValue IVLVal;
12420 llvm::Function *FoundFn;
12421 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12422 Checker.getFoundData();
12423 if (FoundFn != CGF.CurFn) {
12424 // Special codegen for inner parallel regions.
12425 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12426 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12427 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12428 "Lastprivate conditional is not found in outer region.");
12429 QualType StructTy = std::get<0>(It->getSecond());
12430 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12431 LValue PrivLVal = CGF.EmitLValue(FoundE);
12433 PrivLVal.getAddress(),
12434 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12435 CGF.ConvertTypeForMem(StructTy));
12436 LValue BaseLVal =
12437 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12438 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12439 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12440 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12441 FiredLVal, llvm::AtomicOrdering::Unordered,
12442 /*IsVolatile=*/true, /*isInit=*/false);
12443 return;
12444 }
12445
12446 // Private address of the lastprivate conditional in the current context.
12447 // priv_a
12448 LValue LVal = CGF.EmitLValue(FoundE);
12449 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12450 FoundE->getExprLoc());
12451}
12452
12455 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12456 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12457 return;
12458 auto Range = llvm::reverse(LastprivateConditionalStack);
12459 auto It = llvm::find_if(
12460 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12461 if (It == Range.end() || It->Fn != CGF.CurFn)
12462 return;
12463 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12464 assert(LPCI != LastprivateConditionalToTypes.end() &&
12465 "Lastprivates must be registered already.");
12467 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12468 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12469 for (const auto &Pair : It->DeclToUniqueName) {
12470 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12471 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12472 continue;
12473 auto I = LPCI->getSecond().find(Pair.first);
12474 assert(I != LPCI->getSecond().end() &&
12475 "Lastprivate must be rehistered already.");
12476 // bool Cmp = priv_a.Fired != 0;
12477 LValue BaseLVal = std::get<3>(I->getSecond());
12478 LValue FiredLVal =
12479 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12480 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12481 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12482 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12483 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12484 // if (Cmp) {
12485 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12486 CGF.EmitBlock(ThenBB);
12487 Address Addr = CGF.GetAddrOfLocalVar(VD);
12488 LValue LVal;
12489 if (VD->getType()->isReferenceType())
12490 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12492 else
12493 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12495 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12496 D.getBeginLoc());
12498 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12499 // }
12500 }
12501}
12502
12504 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12505 SourceLocation Loc) {
12506 if (CGF.getLangOpts().OpenMP < 50)
12507 return;
12508 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12509 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12510 "Unknown lastprivate conditional variable.");
12511 StringRef UniqueName = It->second;
12512 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12513 // The variable was not updated in the region - exit.
12514 if (!GV)
12515 return;
12516 LValue LPLVal = CGF.MakeRawAddrLValue(
12517 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12518 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12519 CGF.EmitStoreOfScalar(Res, PrivLVal);
12520}
12521
12524 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12525 const RegionCodeGenTy &CodeGen) {
12526 llvm_unreachable("Not supported in SIMD-only mode");
12527}
12528
12531 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12532 const RegionCodeGenTy &CodeGen) {
12533 llvm_unreachable("Not supported in SIMD-only mode");
12534}
12535
12537 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12538 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12539 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12540 bool Tied, unsigned &NumberOfParts) {
12541 llvm_unreachable("Not supported in SIMD-only mode");
12542}
12543
12545 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
12546 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
12547 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
12548 OpenMPSeverityClauseKind Severity, const Expr *Message) {
12549 llvm_unreachable("Not supported in SIMD-only mode");
12550}
12551
12553 CodeGenFunction &CGF, StringRef CriticalName,
12554 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12555 const Expr *Hint) {
12556 llvm_unreachable("Not supported in SIMD-only mode");
12557}
12558
12560 const RegionCodeGenTy &MasterOpGen,
12561 SourceLocation Loc) {
12562 llvm_unreachable("Not supported in SIMD-only mode");
12563}
12564
12566 const RegionCodeGenTy &MasterOpGen,
12567 SourceLocation Loc,
12568 const Expr *Filter) {
12569 llvm_unreachable("Not supported in SIMD-only mode");
12570}
12571
12573 SourceLocation Loc) {
12574 llvm_unreachable("Not supported in SIMD-only mode");
12575}
12576
12578 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12579 SourceLocation Loc) {
12580 llvm_unreachable("Not supported in SIMD-only mode");
12581}
12582
12584 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12585 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12587 ArrayRef<const Expr *> AssignmentOps) {
12588 llvm_unreachable("Not supported in SIMD-only mode");
12589}
12590
12592 const RegionCodeGenTy &OrderedOpGen,
12593 SourceLocation Loc,
12594 bool IsThreads) {
12595 llvm_unreachable("Not supported in SIMD-only mode");
12596}
12597
12599 SourceLocation Loc,
12601 bool EmitChecks,
12602 bool ForceSimpleCall) {
12603 llvm_unreachable("Not supported in SIMD-only mode");
12604}
12605
12608 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12609 bool Ordered, const DispatchRTInput &DispatchValues) {
12610 llvm_unreachable("Not supported in SIMD-only mode");
12611}
12612
12614 SourceLocation Loc) {
12615 llvm_unreachable("Not supported in SIMD-only mode");
12616}
12617
12620 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12621 llvm_unreachable("Not supported in SIMD-only mode");
12622}
12623
12626 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12627 llvm_unreachable("Not supported in SIMD-only mode");
12628}
12629
12631 SourceLocation Loc,
12632 unsigned IVSize,
12633 bool IVSigned) {
12634 llvm_unreachable("Not supported in SIMD-only mode");
12635}
12636
12638 SourceLocation Loc,
12639 OpenMPDirectiveKind DKind) {
12640 llvm_unreachable("Not supported in SIMD-only mode");
12641}
12642
12644 SourceLocation Loc,
12645 unsigned IVSize, bool IVSigned,
12646 Address IL, Address LB,
12647 Address UB, Address ST) {
12648 llvm_unreachable("Not supported in SIMD-only mode");
12649}
12650
12652 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
12654 SourceLocation SeverityLoc, const Expr *Message,
12655 SourceLocation MessageLoc) {
12656 llvm_unreachable("Not supported in SIMD-only mode");
12657}
12658
12660 ProcBindKind ProcBind,
12661 SourceLocation Loc) {
12662 llvm_unreachable("Not supported in SIMD-only mode");
12663}
12664
12666 const VarDecl *VD,
12667 Address VDAddr,
12668 SourceLocation Loc) {
12669 llvm_unreachable("Not supported in SIMD-only mode");
12670}
12671
12673 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12674 CodeGenFunction *CGF) {
12675 llvm_unreachable("Not supported in SIMD-only mode");
12676}
12677
12679 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12680 llvm_unreachable("Not supported in SIMD-only mode");
12681}
12682
12685 SourceLocation Loc,
12686 llvm::AtomicOrdering AO) {
12687 llvm_unreachable("Not supported in SIMD-only mode");
12688}
12689
12691 const OMPExecutableDirective &D,
12692 llvm::Function *TaskFunction,
12693 QualType SharedsTy, Address Shareds,
12694 const Expr *IfCond,
12695 const OMPTaskDataTy &Data) {
12696 llvm_unreachable("Not supported in SIMD-only mode");
12697}
12698
12701 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12702 const Expr *IfCond, const OMPTaskDataTy &Data) {
12703 llvm_unreachable("Not supported in SIMD-only mode");
12704}
12705
12709 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12710 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12711 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12712 ReductionOps, Options);
12713}
12714
12717 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12718 llvm_unreachable("Not supported in SIMD-only mode");
12719}
12720
12722 SourceLocation Loc,
12723 bool IsWorksharingReduction) {
12724 llvm_unreachable("Not supported in SIMD-only mode");
12725}
12726
12728 SourceLocation Loc,
12729 ReductionCodeGen &RCG,
12730 unsigned N) {
12731 llvm_unreachable("Not supported in SIMD-only mode");
12732}
12733
12735 SourceLocation Loc,
12736 llvm::Value *ReductionsPtr,
12737 LValue SharedLVal) {
12738 llvm_unreachable("Not supported in SIMD-only mode");
12739}
12740
12742 SourceLocation Loc,
12743 const OMPTaskDataTy &Data) {
12744 llvm_unreachable("Not supported in SIMD-only mode");
12745}
12746
12749 OpenMPDirectiveKind CancelRegion) {
12750 llvm_unreachable("Not supported in SIMD-only mode");
12751}
12752
12754 SourceLocation Loc, const Expr *IfCond,
12755 OpenMPDirectiveKind CancelRegion) {
12756 llvm_unreachable("Not supported in SIMD-only mode");
12757}
12758
12760 const OMPExecutableDirective &D, StringRef ParentName,
12761 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12762 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12763 llvm_unreachable("Not supported in SIMD-only mode");
12764}
12765
12768 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12769 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12770 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12771 const OMPLoopDirective &D)>
12772 SizeEmitter) {
12773 llvm_unreachable("Not supported in SIMD-only mode");
12774}
12775
12777 llvm_unreachable("Not supported in SIMD-only mode");
12778}
12779
12781 llvm_unreachable("Not supported in SIMD-only mode");
12782}
12783
12785 return false;
12786}
12787
12789 const OMPExecutableDirective &D,
12790 SourceLocation Loc,
12791 llvm::Function *OutlinedFn,
12792 ArrayRef<llvm::Value *> CapturedVars) {
12793 llvm_unreachable("Not supported in SIMD-only mode");
12794}
12795
12797 const Expr *NumTeams,
12798 const Expr *ThreadLimit,
12799 SourceLocation Loc) {
12800 llvm_unreachable("Not supported in SIMD-only mode");
12801}
12802
12804 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12805 const Expr *Device, const RegionCodeGenTy &CodeGen,
12807 llvm_unreachable("Not supported in SIMD-only mode");
12808}
12809
12811 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12812 const Expr *Device) {
12813 llvm_unreachable("Not supported in SIMD-only mode");
12814}
12815
12817 const OMPLoopDirective &D,
12818 ArrayRef<Expr *> NumIterations) {
12819 llvm_unreachable("Not supported in SIMD-only mode");
12820}
12821
12823 const OMPDependClause *C) {
12824 llvm_unreachable("Not supported in SIMD-only mode");
12825}
12826
12828 const OMPDoacrossClause *C) {
12829 llvm_unreachable("Not supported in SIMD-only mode");
12830}
12831
12832const VarDecl *
12834 const VarDecl *NativeParam) const {
12835 llvm_unreachable("Not supported in SIMD-only mode");
12836}
12837
12838Address
12840 const VarDecl *NativeParam,
12841 const VarDecl *TargetParam) const {
12842 llvm_unreachable("Not supported in SIMD-only mode");
12843}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
SourceManager & getSourceManager()
Definition ASTContext.h:833
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:926
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5263
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3722
Attr - This represents one attribute.
Definition Attr.h:44
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3899
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3933
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1353
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3939
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3927
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3930
This captures a statement into a function.
Definition Stmt.h:3886
const Capture * const_capture_iterator
Definition Stmt.h:4020
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4037
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4007
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:3990
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1479
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4032
capture_range captures()
Definition Stmt.h:4024
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3067
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2394
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2278
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3076
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2251
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5289
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:176
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:244
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2373
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:4872
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:225
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5463
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2202
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2570
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3086
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:295
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1552
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:675
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:188
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
Definition CGExpr.cpp:3034
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1610
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5104
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1668
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1807
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:655
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2092
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2944
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1701
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:739
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:182
CharUnits getAlignment() const
Definition CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:338
Address getAddress() const
Definition CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:346
QualType getType() const
Definition CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:71
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:546
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition Diagnostic.h:905
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3112
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3090
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3085
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3665
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:273
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4294
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4030
Represents a member of a struct/union/class.
Definition Decl.h:3160
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3245
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3396
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4689
Represents a function declaration or definition.
Definition Decl.h:2000
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2797
QualType getReturnType() const
Definition Decl.h:2845
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2774
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3736
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3815
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5522
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:971
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3298
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3381
Expr * getBase() const
Definition Expr.h:3375
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
This represents clause 'affinity' in the 'pragma omp task'-based directives.
ArrayRef< MappableComponent > MappableExprComponentListRef
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
This represents clause 'in_reduction' in the 'pragma omp task' directives.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5471
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents clause 'lastprivate' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'nowait' clause in the 'pragma omp ...' directive.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'ordered' clause in the 'pragma omp ...' directive.
This represents clause 'private' in the 'pragma omp ...' directives.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1790
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3328
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8278
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8318
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8463
QualType getCanonicalType() const
Definition TypeBase.h:8330
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4312
field_iterator field_end() const
Definition Decl.h:4518
field_range fields() const
Definition Decl.h:4515
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5218
bool field_empty() const
Definition Decl.h:4523
field_iterator field_begin() const
Definition Decl.cpp:5202
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:85
child_range children()
Definition Stmt.cpp:299
StmtClass getStmtClass() const
Definition Stmt.h:1472
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:338
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:350
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4895
bool isUnion() const
Definition Decl.h:3922
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8871
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9051
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2205
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8614
bool isPointerType() const
Definition TypeBase.h:8515
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8915
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9158
bool isReferenceType() const
Definition TypeBase.h:8539
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
bool isLValueReferenceType() const
Definition TypeBase.h:8543
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2411
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3119
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9044
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2800
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9144
bool isFloatingType() const
Definition Type.cpp:2304
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2253
bool isAnyPointerType() const
Definition TypeBase.h:8523
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9091
bool isRecordType() const
Definition TypeBase.h:8642
bool isUnionType() const
Definition Type.cpp:718
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2257
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2366
const Expr * getInit() const
Definition Decl.h:1368
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1217
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1295
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2375
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1262
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1358
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3966
Expr * getSizeExpr() const
Definition TypeBase.h:3980
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:154
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:145
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
@ Conditional
A conditional (?:) operator.
Definition Sema.h:667
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
static bool classof(const Stmt *T)
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5886
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:562
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1746
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5339
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.