clang 22.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <cstdint>
43#include <numeric>
44#include <optional>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479};
480
481/// Describes ident structure that describes a source location.
482/// All descriptions are taken from
483/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484/// Original structure:
485/// typedef struct ident {
486/// kmp_int32 reserved_1; /**< might be used in Fortran;
487/// see above */
488/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489/// KMP_IDENT_KMPC identifies this union
490/// member */
491/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492/// see above */
493///#if USE_ITT_BUILD
494/// /* but currently used for storing
495/// region-specific ITT */
496/// /* contextual information. */
497///#endif /* USE_ITT_BUILD */
498/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499/// C++ */
500/// char const *psource; /**< String describing the source location.
501/// The string is composed of semi-colon separated
502// fields which describe the source file,
503/// the function and a pair of line numbers that
504/// delimit the construct.
505/// */
506/// } ident_t;
507enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520};
521
522/// Schedule types for 'omp for' loops (these enumerators are taken from
523/// the enum sched_type in kmp.h).
524enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552};
553
554/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555/// region.
556class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559public:
560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566};
567
568} // anonymous namespace
569
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579}
580
581/// Check if the combiner is a call to UDR combiner and if it is so return the
582/// UDR decl used for reduction.
583static const OMPDeclareReductionDecl *
584getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
590 return DRD;
591 return nullptr;
592}
593
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
602 const auto *CE = cast<CallExpr>(InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
612 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
613 (void)PrivateScope.Privatize();
616 CGF.EmitIgnoredExpr(InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646}
647
648/// Emit initialization of arrays of complex types.
649/// \param DestAddr Address of the array.
650/// \param Type Type of array.
651/// \param Init Initial expression of array.
652/// \param SrcAddr Address of the original array.
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
691 "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(DestBegin, EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703
704 // Emit copy.
705 {
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
709 SrcElementCurrent, ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
719 "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
726 "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
731 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
735}
736
737LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739}
740
741LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
744 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
745 return LValue();
746}
747
748void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SharedAddr);
763}
764
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(Shareds.size());
770 SharedAddresses.reserve(Shareds.size());
771 Sizes.reserve(Shareds.size());
772 BaseDecls.reserve(Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
778 std::advance(IOrig, 1);
779 std::advance(IPriv, 1);
780 std::advance(IRed, 1);
781 }
782}
783
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
789 SharedAddresses.emplace_back(First, Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(First, Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
795 OrigAddresses.emplace_back(First, Second);
796 }
797}
798
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
805 nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemType,
814 OrigAddresses[N].second.getPointer(CGF),
815 OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823 }
824 Sizes.emplace_back(SizeInChars, Size);
826 CGF,
828 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
829 RValue::get(Size));
830 CGF.EmitVariablyModifiedType(PrivateType);
831}
832
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
843 CGF,
845 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
846 RValue::get(Size));
847 CGF.EmitVariablyModifiedType(PrivateType);
848}
849
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
866 PrivateAddr, SharedAddr, SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
870 PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873}
874
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879}
880
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
888 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
889 }
890}
891
892static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
907 BaseLV.getType(), BaseLV.getBaseInfo(),
908 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
909}
910
912 Address OriginalBaseAddress, llvm::Value *Addr) {
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919 Tmp = CGF.CreateMemTemp(BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
930 Addr, Tmp.getElementType());
931 CGF.Builder.CreateStore(Addr, Tmp);
932 return MostTopTmp;
933 }
934
936 Addr, OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
938}
939
940static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
949 OrigVD = cast<VarDecl>(DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
955 OrigVD = cast<VarDecl>(DE->getDecl());
956 }
957 return OrigVD;
958}
959
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968 OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
972 SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
975 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 SharedAddr.getElementType(), PrivatePointer, Adjustment);
978 return castToBase(CGF, OrigVD->getType(),
979 SharedAddresses[N].first.getType(),
980 OriginalBaseLValue.getAddress(), Ptr);
981 }
982 BaseDecls.emplace_back(
983 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985}
986
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991}
992
993LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
996 getThreadIDVariable()->getType()->castAs<PointerType>());
997}
998
999void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012}
1013
1014LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1017 getThreadIDVariable()->getType(),
1019}
1020
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1025 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(Field);
1029 return Field;
1030}
1031
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 Config.setDefaultTargetAS(
1041 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1042
1043 OMPBuilder.setConfig(Config);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1046 CGM.getLangOpts().OpenMPIsTargetDevice
1047 ? CGM.getLangOpts().OMPHostIRFile
1048 : StringRef{});
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056}
1057
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071}
1072
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075}
1076
1077static llvm::Function *
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(&OmpOutParm);
1090 Args.push_back(&OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097 Name, &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1100 Fn->removeFnAttr(llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108 Out->getLocation());
1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111 Scope.addPrivate(
1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115 Scope.addPrivate(
1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Out->getInit())) {
1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122 Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130}
1131
1134 if (UDRMap.count(D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, D->getType(), D->getCombiner(),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1144 CGM, D->getType(),
1146 : nullptr,
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(D, Combiner, Initializer);
1152 if (CGF)
1153 FunctionUDRMap[CGF->CurFn].push_back(D);
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1158 auto I = UDRMap.find(D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 return llvm::Error::success();
1196 };
1197
1198 // TODO: Remove this once we emit parallel regions through the
1199 // OpenMPIRBuilder as it can do this setup internally.
1200 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1201 OMPBuilder->pushFinalizationCB(std::move(FI));
1202 }
1203 ~PushAndPopStackRAII() {
1204 if (OMPBuilder)
1205 OMPBuilder->popFinalizationCB();
1206 }
1207 llvm::OpenMPIRBuilder *OMPBuilder;
1208};
1209} // namespace
1210
1212 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1213 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1214 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1215 assert(ThreadIDVar->getType()->isPointerType() &&
1216 "thread id variable must be of type kmp_int32 *");
1217 CodeGenFunction CGF(CGM, true);
1218 bool HasCancel = false;
1219 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1220 HasCancel = OPD->hasCancel();
1221 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1222 HasCancel = OPD->hasCancel();
1223 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1224 HasCancel = OPSD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1230 HasCancel = OPFD->hasCancel();
1231 else if (const auto *OPFD =
1232 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1233 HasCancel = OPFD->hasCancel();
1234 else if (const auto *OPFD =
1235 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237
1238 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1239 // parallel region to make cancellation barriers work properly.
1240 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1241 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1242 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1243 HasCancel, OutlinedHelperName);
1244 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1245 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1246}
1247
1248std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1249 std::string Suffix = getName({"omp_outlined"});
1250 return (Name + Suffix).str();
1251}
1252
1254 return getOutlinedHelperName(CGF.CurFn->getName());
1255}
1256
1257std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1258 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1259 return (Name + Suffix).str();
1260}
1261
1264 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1265 const RegionCodeGenTy &CodeGen) {
1266 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1268 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1269 CodeGen);
1270}
1271
1274 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1275 const RegionCodeGenTy &CodeGen) {
1276 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1278 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1279 CodeGen);
1280}
1281
1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1286 bool Tied, unsigned &NumberOfParts) {
1287 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1288 PrePostActionTy &) {
1289 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1290 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1291 llvm::Value *TaskArgs[] = {
1292 UpLoc, ThreadID,
1293 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1294 TaskTVar->getType()->castAs<PointerType>())
1295 .getPointer(CGF)};
1296 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1297 CGM.getModule(), OMPRTL___kmpc_omp_task),
1298 TaskArgs);
1299 };
1300 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1301 UntiedCodeGen);
1302 CodeGen.setAction(Action);
1303 assert(!ThreadIDVar->getType()->isPointerType() &&
1304 "thread id variable must be of type kmp_int32 for tasks");
1305 const OpenMPDirectiveKind Region =
1306 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1307 : OMPD_task;
1308 const CapturedStmt *CS = D.getCapturedStmt(Region);
1309 bool HasCancel = false;
1310 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1313 HasCancel = TD->hasCancel();
1314 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1315 HasCancel = TD->hasCancel();
1316 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1317 HasCancel = TD->hasCancel();
1318
1319 CodeGenFunction CGF(CGM, true);
1320 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1321 InnermostKind, HasCancel, Action);
1322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1323 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1324 if (!Tied)
1325 NumberOfParts = Action.getNumberOfParts();
1326 return Res;
1327}
1328
1330 bool AtCurrentPoint) {
1331 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1332 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1333
1334 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1335 if (AtCurrentPoint) {
1336 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1337 CGF.Builder.GetInsertBlock());
1338 } else {
1339 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1341 }
1342}
1343
1345 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1346 if (Elem.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1348 Elem.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1359 OS << ";";
1360 if (auto *DbgInfo = CGF.getDebugInfo())
1361 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1362 else
1363 OS << PLoc.getFilename();
1364 OS << ";";
1365 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1366 OS << FD->getQualifiedNameAsString();
1367 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1368 return OS.str();
1369}
1370
1372 SourceLocation Loc,
1373 unsigned Flags, bool EmitLoc) {
1374 uint32_t SrcLocStrSize;
1375 llvm::Constant *SrcLocStr;
1376 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1377 llvm::codegenoptions::NoDebugInfo) ||
1378 Loc.isInvalid()) {
1379 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1380 } else {
1381 std::string FunctionName;
1382 std::string FileName;
1383 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1384 FunctionName = FD->getQualifiedNameAsString();
1386 if (auto *DbgInfo = CGF.getDebugInfo())
1387 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1388 else
1389 FileName = PLoc.getFilename();
1390 unsigned Line = PLoc.getLine();
1391 unsigned Column = PLoc.getColumn();
1392 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1393 Column, SrcLocStrSize);
1394 }
1395 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1396 return OMPBuilder.getOrCreateIdent(
1397 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1398}
1399
1401 SourceLocation Loc) {
1402 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1403 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1404 // the clang invariants used below might be broken.
1405 if (CGM.getLangOpts().OpenMPIRBuilder) {
1406 SmallString<128> Buffer;
1407 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1408 uint32_t SrcLocStrSize;
1409 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1410 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1411 return OMPBuilder.getOrCreateThreadID(
1412 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1413 }
1414
1415 llvm::Value *ThreadID = nullptr;
1416 // Check whether we've already cached a load of the thread id in this
1417 // function.
1418 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1419 if (I != OpenMPLocThreadIDMap.end()) {
1420 ThreadID = I->second.ThreadID;
1421 if (ThreadID != nullptr)
1422 return ThreadID;
1423 }
1424 // If exceptions are enabled, do not use parameter to avoid possible crash.
1425 if (auto *OMPRegionInfo =
1426 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1427 if (OMPRegionInfo->getThreadIDVariable()) {
1428 // Check if this an outlined function with thread id passed as argument.
1429 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1430 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1431 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1432 !CGF.getLangOpts().CXXExceptions ||
1433 CGF.Builder.GetInsertBlock() == TopBlock ||
1434 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1435 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1436 TopBlock ||
1437 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1438 CGF.Builder.GetInsertBlock()) {
1439 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1440 // If value loaded in entry block, cache it and use it everywhere in
1441 // function.
1442 if (CGF.Builder.GetInsertBlock() == TopBlock)
1443 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1444 return ThreadID;
1445 }
1446 }
1447 }
1448
1449 // This is not an outlined function region - need to call __kmpc_int32
1450 // kmpc_global_thread_num(ident_t *loc).
1451 // Generate thread id value and cache this value for use across the
1452 // function.
1453 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1454 if (!Elem.ServiceInsertPt)
1456 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1457 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1459 llvm::CallInst *Call = CGF.Builder.CreateCall(
1460 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1461 OMPRTL___kmpc_global_thread_num),
1462 emitUpdateLocation(CGF, Loc));
1463 Call->setCallingConv(CGF.getRuntimeCC());
1464 Elem.ThreadID = Call;
1465 return Call;
1466}
1467
1469 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1470 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1472 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1473 }
1474 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1475 for (const auto *D : I->second)
1476 UDRMap.erase(D);
1477 FunctionUDRMap.erase(I);
1478 }
1479 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1480 for (const auto *D : I->second)
1481 UDMMap.erase(D);
1482 FunctionUDMMap.erase(I);
1483 }
1486}
1487
1489 return OMPBuilder.IdentPtr;
1490}
1491
1492static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1494 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1495 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1496 if (!DevTy)
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1498
1499 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1500 case OMPDeclareTargetDeclAttr::DT_Host:
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1502 break;
1503 case OMPDeclareTargetDeclAttr::DT_NoHost:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1505 break;
1506 case OMPDeclareTargetDeclAttr::DT_Any:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1508 break;
1509 default:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1511 break;
1512 }
1513}
1514
1515static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1517 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1519 if (!MapType)
1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1521 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1524 break;
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1527 break;
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1530 break;
1531 default:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1533 break;
1534 }
1535}
1536
1537static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1538 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1539 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1540
1541 auto FileInfoCallBack = [&]() {
1543 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1544
1545 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1546 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1547
1548 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1549 };
1550
1551 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1552 *CGM.getFileSystem(), ParentName);
1553}
1554
1556 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1557
1558 auto LinkageForVariable = [&VD, this]() {
1559 return CGM.getLLVMLinkageVarDefinition(VD);
1560 };
1561
1562 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1563
1564 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1565 CGM.getContext().getPointerType(VD->getType()));
1566 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1568 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1569 VD->isExternallyVisible(),
1571 VD->getCanonicalDecl()->getBeginLoc()),
1572 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1573 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1574 LinkageForVariable);
1575
1576 if (!addr)
1577 return ConstantAddress::invalid();
1578 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1579}
1580
1581llvm::Constant *
1583 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1584 !CGM.getContext().getTargetInfo().isTLSSupported());
1585 // Lookup the entry, lazily creating it if necessary.
1586 std::string Suffix = getName({"cache", ""});
1587 return OMPBuilder.getOrCreateInternalVariable(
1588 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1589}
1590
1592 const VarDecl *VD,
1593 Address VDAddr,
1594 SourceLocation Loc) {
1595 if (CGM.getLangOpts().OpenMPUseTLS &&
1596 CGM.getContext().getTargetInfo().isTLSSupported())
1597 return VDAddr;
1598
1599 llvm::Type *VarTy = VDAddr.getElementType();
1600 llvm::Value *Args[] = {
1601 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1602 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1603 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1605 return Address(
1606 CGF.EmitRuntimeCall(
1607 OMPBuilder.getOrCreateRuntimeFunction(
1608 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1609 Args),
1610 CGF.Int8Ty, VDAddr.getAlignment());
1611}
1612
1614 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1615 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1616 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1617 // library.
1618 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1619 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1620 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1621 OMPLoc);
1622 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1623 // to register constructor/destructor for variable.
1624 llvm::Value *Args[] = {
1625 OMPLoc,
1626 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1627 Ctor, CopyCtor, Dtor};
1628 CGF.EmitRuntimeCall(
1629 OMPBuilder.getOrCreateRuntimeFunction(
1630 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1631 Args);
1632}
1633
1635 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1636 bool PerformInit, CodeGenFunction *CGF) {
1637 if (CGM.getLangOpts().OpenMPUseTLS &&
1638 CGM.getContext().getTargetInfo().isTLSSupported())
1639 return nullptr;
1640
1641 VD = VD->getDefinition(CGM.getContext());
1642 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1643 QualType ASTTy = VD->getType();
1644
1645 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1646 const Expr *Init = VD->getAnyInitializer();
1647 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1648 // Generate function that re-emits the declaration's initializer into the
1649 // threadprivate copy of the variable VD
1650 CodeGenFunction CtorCGF(CGM);
1651 FunctionArgList Args;
1652 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1653 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1655 Args.push_back(&Dst);
1656
1657 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1658 CGM.getContext().VoidPtrTy, Args);
1659 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1660 std::string Name = getName({"__kmpc_global_ctor_", ""});
1661 llvm::Function *Fn =
1662 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1663 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1664 Args, Loc, Loc);
1665 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1666 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1667 CGM.getContext().VoidPtrTy, Dst.getLocation());
1668 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1669 VDAddr.getAlignment());
1670 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1671 /*IsInitializer=*/true);
1672 ArgVal = CtorCGF.EmitLoadOfScalar(
1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1674 CGM.getContext().VoidPtrTy, Dst.getLocation());
1675 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1676 CtorCGF.FinishFunction();
1677 Ctor = Fn;
1678 }
1680 // Generate function that emits destructor call for the threadprivate copy
1681 // of the variable VD
1682 CodeGenFunction DtorCGF(CGM);
1683 FunctionArgList Args;
1684 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1685 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1687 Args.push_back(&Dst);
1688
1689 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1690 CGM.getContext().VoidTy, Args);
1691 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1692 std::string Name = getName({"__kmpc_global_dtor_", ""});
1693 llvm::Function *Fn =
1694 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1695 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1696 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1697 Loc, Loc);
1698 // Create a scope with an artificial location for the body of this function.
1699 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1700 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1701 DtorCGF.GetAddrOfLocalVar(&Dst),
1702 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1703 DtorCGF.emitDestroy(
1704 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1705 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1706 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1707 DtorCGF.FinishFunction();
1708 Dtor = Fn;
1709 }
1710 // Do not emit init function if it is not required.
1711 if (!Ctor && !Dtor)
1712 return nullptr;
1713
1714 // Copying constructor for the threadprivate variable.
1715 // Must be NULL - reserved by runtime, but currently it requires that this
1716 // parameter is always NULL. Otherwise it fires assertion.
1717 CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1718 if (Ctor == nullptr) {
1719 Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1720 }
1721 if (Dtor == nullptr) {
1722 Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1723 }
1724 if (!CGF) {
1725 auto *InitFunctionTy =
1726 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1727 std::string Name = getName({"__omp_threadprivate_init_", ""});
1728 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1729 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1730 CodeGenFunction InitCGF(CGM);
1731 FunctionArgList ArgList;
1732 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1733 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1734 Loc, Loc);
1735 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1736 InitCGF.FinishFunction();
1737 return InitFunction;
1738 }
1739 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1740 }
1741 return nullptr;
1742}
1743
1745 llvm::GlobalValue *GV) {
1746 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1747 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1748
1749 // We only need to handle active 'indirect' declare target functions.
1750 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1751 return;
1752
1753 // Get a mangled name to store the new device global in.
1754 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1756 SmallString<128> Name;
1757 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1758
1759 // We need to generate a new global to hold the address of the indirectly
1760 // called device function. Doing this allows us to keep the visibility and
1761 // linkage of the associated function unchanged while allowing the runtime to
1762 // access its value.
1763 llvm::GlobalValue *Addr = GV;
1764 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1765 Addr = new llvm::GlobalVariable(
1766 CGM.getModule(), CGM.VoidPtrTy,
1767 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1768 nullptr, llvm::GlobalValue::NotThreadLocal,
1769 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1770 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1771 }
1772
1773 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1774 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1775 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1776 llvm::GlobalValue::WeakODRLinkage);
1777}
1778
1780 QualType VarType,
1781 StringRef Name) {
1782 std::string Suffix = getName({"artificial", ""});
1783 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1784 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1785 VarLVType, Twine(Name).concat(Suffix).str());
1786 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1787 CGM.getTarget().isTLSSupported()) {
1788 GAddr->setThreadLocal(/*Val=*/true);
1789 return Address(GAddr, GAddr->getValueType(),
1790 CGM.getContext().getTypeAlignInChars(VarType));
1791 }
1792 std::string CacheSuffix = getName({"cache", ""});
1793 llvm::Value *Args[] = {
1796 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1797 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1798 /*isSigned=*/false),
1799 OMPBuilder.getOrCreateInternalVariable(
1800 CGM.VoidPtrPtrTy,
1801 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1802 return Address(
1804 CGF.EmitRuntimeCall(
1805 OMPBuilder.getOrCreateRuntimeFunction(
1806 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1807 Args),
1808 CGF.Builder.getPtrTy(0)),
1809 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1810}
1811
1813 const RegionCodeGenTy &ThenGen,
1814 const RegionCodeGenTy &ElseGen) {
1815 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1816
1817 // If the condition constant folds and can be elided, try to avoid emitting
1818 // the condition and the dead arm of the if/else.
1819 bool CondConstant;
1820 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1821 if (CondConstant)
1822 ThenGen(CGF);
1823 else
1824 ElseGen(CGF);
1825 return;
1826 }
1827
1828 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1829 // emit the conditional branch.
1830 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1831 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1832 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1833 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1834
1835 // Emit the 'then' code.
1836 CGF.EmitBlock(ThenBlock);
1837 ThenGen(CGF);
1838 CGF.EmitBranch(ContBlock);
1839 // Emit the 'else' code if present.
1840 // There is no need to emit line number for unconditional branch.
1842 CGF.EmitBlock(ElseBlock);
1843 ElseGen(CGF);
1844 // There is no need to emit line number for unconditional branch.
1846 CGF.EmitBranch(ContBlock);
1847 // Emit the continuation block for code after the if.
1848 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1849}
1850
1852 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1853 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1854 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1855 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1856 if (!CGF.HaveInsertPoint())
1857 return;
1858 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1859 auto &M = CGM.getModule();
1860 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1861 this](CodeGenFunction &CGF, PrePostActionTy &) {
1862 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1863 llvm::Value *Args[] = {
1864 RTLoc,
1865 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1866 OutlinedFn};
1868 RealArgs.append(std::begin(Args), std::end(Args));
1869 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1870
1871 llvm::FunctionCallee RTLFn =
1872 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1873 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1874 };
1875 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1876 this](CodeGenFunction &CGF, PrePostActionTy &) {
1878 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1879 // Build calls:
1880 // __kmpc_serialized_parallel(&Loc, GTid);
1881 llvm::Value *Args[] = {RTLoc, ThreadID};
1882 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1883 M, OMPRTL___kmpc_serialized_parallel),
1884 Args);
1885
1886 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1887 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1888 RawAddress ZeroAddrBound =
1890 /*Name=*/".bound.zero.addr");
1891 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1893 // ThreadId for serialized parallels is 0.
1894 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1895 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1896 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1897
1898 // Ensure we do not inline the function. This is trivially true for the ones
1899 // passed to __kmpc_fork_call but the ones called in serialized regions
1900 // could be inlined. This is not a perfect but it is closer to the invariant
1901 // we want, namely, every data environment starts with a new function.
1902 // TODO: We should pass the if condition to the runtime function and do the
1903 // handling there. Much cleaner code.
1904 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1905 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1906 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1907
1908 // __kmpc_end_serialized_parallel(&Loc, GTid);
1909 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1910 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1911 M, OMPRTL___kmpc_end_serialized_parallel),
1912 EndArgs);
1913 };
1914 if (IfCond) {
1915 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1916 } else {
1917 RegionCodeGenTy ThenRCG(ThenGen);
1918 ThenRCG(CGF);
1919 }
1920}
1921
1922// If we're inside an (outlined) parallel region, use the region info's
1923// thread-ID variable (it is passed in a first argument of the outlined function
1924// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1925// regular serial code region, get thread ID by calling kmp_int32
1926// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1927// return the address of that temp.
1929 SourceLocation Loc) {
1930 if (auto *OMPRegionInfo =
1931 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1932 if (OMPRegionInfo->getThreadIDVariable())
1933 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1934
1935 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1936 QualType Int32Ty =
1937 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1938 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1939 CGF.EmitStoreOfScalar(ThreadID,
1940 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1941
1942 return ThreadIDTemp;
1943}
1944
1945llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1946 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1947 std::string Name = getName({Prefix, "var"});
1948 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1949}
1950
1951namespace {
1952/// Common pre(post)-action for different OpenMP constructs.
1953class CommonActionTy final : public PrePostActionTy {
1954 llvm::FunctionCallee EnterCallee;
1955 ArrayRef<llvm::Value *> EnterArgs;
1956 llvm::FunctionCallee ExitCallee;
1957 ArrayRef<llvm::Value *> ExitArgs;
1958 bool Conditional;
1959 llvm::BasicBlock *ContBlock = nullptr;
1960
1961public:
1962 CommonActionTy(llvm::FunctionCallee EnterCallee,
1963 ArrayRef<llvm::Value *> EnterArgs,
1964 llvm::FunctionCallee ExitCallee,
1965 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1966 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1967 ExitArgs(ExitArgs), Conditional(Conditional) {}
1968 void Enter(CodeGenFunction &CGF) override {
1969 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1970 if (Conditional) {
1971 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1972 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1973 ContBlock = CGF.createBasicBlock("omp_if.end");
1974 // Generate the branch (If-stmt)
1975 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1976 CGF.EmitBlock(ThenBlock);
1977 }
1978 }
1979 void Done(CodeGenFunction &CGF) {
1980 // Emit the rest of blocks/branches
1981 CGF.EmitBranch(ContBlock);
1982 CGF.EmitBlock(ContBlock, true);
1983 }
1984 void Exit(CodeGenFunction &CGF) override {
1985 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1986 }
1987};
1988} // anonymous namespace
1989
1991 StringRef CriticalName,
1992 const RegionCodeGenTy &CriticalOpGen,
1993 SourceLocation Loc, const Expr *Hint) {
1994 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1995 // CriticalOpGen();
1996 // __kmpc_end_critical(ident_t *, gtid, Lock);
1997 // Prepare arguments and build a call to __kmpc_critical
1998 if (!CGF.HaveInsertPoint())
1999 return;
2000 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2001 getCriticalRegionLock(CriticalName)};
2002 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2003 std::end(Args));
2004 if (Hint) {
2005 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2006 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2007 }
2008 CommonActionTy Action(
2009 OMPBuilder.getOrCreateRuntimeFunction(
2010 CGM.getModule(),
2011 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2012 EnterArgs,
2013 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2014 OMPRTL___kmpc_end_critical),
2015 Args);
2016 CriticalOpGen.setAction(Action);
2017 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2018}
2019
2021 const RegionCodeGenTy &MasterOpGen,
2022 SourceLocation Loc) {
2023 if (!CGF.HaveInsertPoint())
2024 return;
2025 // if(__kmpc_master(ident_t *, gtid)) {
2026 // MasterOpGen();
2027 // __kmpc_end_master(ident_t *, gtid);
2028 // }
2029 // Prepare arguments and build a call to __kmpc_master
2030 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2031 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2032 CGM.getModule(), OMPRTL___kmpc_master),
2033 Args,
2034 OMPBuilder.getOrCreateRuntimeFunction(
2035 CGM.getModule(), OMPRTL___kmpc_end_master),
2036 Args,
2037 /*Conditional=*/true);
2038 MasterOpGen.setAction(Action);
2039 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2040 Action.Done(CGF);
2041}
2042
2044 const RegionCodeGenTy &MaskedOpGen,
2045 SourceLocation Loc, const Expr *Filter) {
2046 if (!CGF.HaveInsertPoint())
2047 return;
2048 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2049 // MaskedOpGen();
2050 // __kmpc_end_masked(iden_t *, gtid);
2051 // }
2052 // Prepare arguments and build a call to __kmpc_masked
2053 llvm::Value *FilterVal = Filter
2054 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2055 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2056 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2057 FilterVal};
2058 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2059 getThreadID(CGF, Loc)};
2060 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2061 CGM.getModule(), OMPRTL___kmpc_masked),
2062 Args,
2063 OMPBuilder.getOrCreateRuntimeFunction(
2064 CGM.getModule(), OMPRTL___kmpc_end_masked),
2065 ArgsEnd,
2066 /*Conditional=*/true);
2067 MaskedOpGen.setAction(Action);
2068 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2069 Action.Done(CGF);
2070}
2071
2073 SourceLocation Loc) {
2074 if (!CGF.HaveInsertPoint())
2075 return;
2076 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2077 OMPBuilder.createTaskyield(CGF.Builder);
2078 } else {
2079 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2080 llvm::Value *Args[] = {
2081 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2082 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2083 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2084 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2085 Args);
2086 }
2087
2088 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2089 Region->emitUntiedSwitch(CGF);
2090}
2091
2093 const RegionCodeGenTy &TaskgroupOpGen,
2094 SourceLocation Loc) {
2095 if (!CGF.HaveInsertPoint())
2096 return;
2097 // __kmpc_taskgroup(ident_t *, gtid);
2098 // TaskgroupOpGen();
2099 // __kmpc_end_taskgroup(ident_t *, gtid);
2100 // Prepare arguments and build a call to __kmpc_taskgroup
2101 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2102 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2103 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2104 Args,
2105 OMPBuilder.getOrCreateRuntimeFunction(
2106 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2107 Args);
2108 TaskgroupOpGen.setAction(Action);
2109 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2110}
2111
2112/// Given an array of pointers to variables, project the address of a
2113/// given variable.
2115 unsigned Index, const VarDecl *Var) {
2116 // Pull out the pointer to the variable.
2117 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2118 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2119
2120 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2121 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2122}
2123
2125 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2126 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2127 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2128 SourceLocation Loc) {
2129 ASTContext &C = CGM.getContext();
2130 // void copy_func(void *LHSArg, void *RHSArg);
2131 FunctionArgList Args;
2132 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2134 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2136 Args.push_back(&LHSArg);
2137 Args.push_back(&RHSArg);
2138 const auto &CGFI =
2139 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2140 std::string Name =
2141 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2142 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2143 llvm::GlobalValue::InternalLinkage, Name,
2144 &CGM.getModule());
2146 Fn->setDoesNotRecurse();
2147 CodeGenFunction CGF(CGM);
2148 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2149 // Dest = (void*[n])(LHSArg);
2150 // Src = (void*[n])(RHSArg);
2152 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2153 CGF.Builder.getPtrTy(0)),
2154 ArgsElemType, CGF.getPointerAlign());
2156 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2157 CGF.Builder.getPtrTy(0)),
2158 ArgsElemType, CGF.getPointerAlign());
2159 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2160 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2161 // ...
2162 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2163 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2164 const auto *DestVar =
2165 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2166 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2167
2168 const auto *SrcVar =
2169 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2170 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2171
2172 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2173 QualType Type = VD->getType();
2174 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2175 }
2176 CGF.FinishFunction();
2177 return Fn;
2178}
2179
2181 const RegionCodeGenTy &SingleOpGen,
2182 SourceLocation Loc,
2183 ArrayRef<const Expr *> CopyprivateVars,
2184 ArrayRef<const Expr *> SrcExprs,
2185 ArrayRef<const Expr *> DstExprs,
2186 ArrayRef<const Expr *> AssignmentOps) {
2187 if (!CGF.HaveInsertPoint())
2188 return;
2189 assert(CopyprivateVars.size() == SrcExprs.size() &&
2190 CopyprivateVars.size() == DstExprs.size() &&
2191 CopyprivateVars.size() == AssignmentOps.size());
2192 ASTContext &C = CGM.getContext();
2193 // int32 did_it = 0;
2194 // if(__kmpc_single(ident_t *, gtid)) {
2195 // SingleOpGen();
2196 // __kmpc_end_single(ident_t *, gtid);
2197 // did_it = 1;
2198 // }
2199 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2200 // <copy_func>, did_it);
2201
2202 Address DidIt = Address::invalid();
2203 if (!CopyprivateVars.empty()) {
2204 // int32 did_it = 0;
2205 QualType KmpInt32Ty =
2206 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2207 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2208 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2209 }
2210 // Prepare arguments and build a call to __kmpc_single
2211 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2212 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2213 CGM.getModule(), OMPRTL___kmpc_single),
2214 Args,
2215 OMPBuilder.getOrCreateRuntimeFunction(
2216 CGM.getModule(), OMPRTL___kmpc_end_single),
2217 Args,
2218 /*Conditional=*/true);
2219 SingleOpGen.setAction(Action);
2220 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2221 if (DidIt.isValid()) {
2222 // did_it = 1;
2223 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2224 }
2225 Action.Done(CGF);
2226 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2227 // <copy_func>, did_it);
2228 if (DidIt.isValid()) {
2229 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2230 QualType CopyprivateArrayTy = C.getConstantArrayType(
2231 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2232 /*IndexTypeQuals=*/0);
2233 // Create a list of all private variables for copyprivate.
2234 Address CopyprivateList =
2235 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2236 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2237 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2238 CGF.Builder.CreateStore(
2240 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2241 CGF.VoidPtrTy),
2242 Elem);
2243 }
2244 // Build function that copies private values from single region to all other
2245 // threads in the corresponding parallel region.
2246 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2247 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2248 SrcExprs, DstExprs, AssignmentOps, Loc);
2249 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2251 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2252 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2253 llvm::Value *Args[] = {
2254 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2255 getThreadID(CGF, Loc), // i32 <gtid>
2256 BufSize, // size_t <buf_size>
2257 CL.emitRawPointer(CGF), // void *<copyprivate list>
2258 CpyFn, // void (*) (void *, void *) <copy_func>
2259 DidItVal // i32 did_it
2260 };
2261 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2262 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2263 Args);
2264 }
2265}
2266
2268 const RegionCodeGenTy &OrderedOpGen,
2269 SourceLocation Loc, bool IsThreads) {
2270 if (!CGF.HaveInsertPoint())
2271 return;
2272 // __kmpc_ordered(ident_t *, gtid);
2273 // OrderedOpGen();
2274 // __kmpc_end_ordered(ident_t *, gtid);
2275 // Prepare arguments and build a call to __kmpc_ordered
2276 if (IsThreads) {
2277 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2278 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2279 CGM.getModule(), OMPRTL___kmpc_ordered),
2280 Args,
2281 OMPBuilder.getOrCreateRuntimeFunction(
2282 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2283 Args);
2284 OrderedOpGen.setAction(Action);
2285 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2286 return;
2287 }
2288 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2289}
2290
2292 unsigned Flags;
2293 if (Kind == OMPD_for)
2294 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2295 else if (Kind == OMPD_sections)
2296 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2297 else if (Kind == OMPD_single)
2298 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2299 else if (Kind == OMPD_barrier)
2300 Flags = OMP_IDENT_BARRIER_EXPL;
2301 else
2302 Flags = OMP_IDENT_BARRIER_IMPL;
2303 return Flags;
2304}
2305
2307 CodeGenFunction &CGF, const OMPLoopDirective &S,
2308 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2309 // Check if the loop directive is actually a doacross loop directive. In this
2310 // case choose static, 1 schedule.
2311 if (llvm::any_of(
2312 S.getClausesOfKind<OMPOrderedClause>(),
2313 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2314 ScheduleKind = OMPC_SCHEDULE_static;
2315 // Chunk size is 1 in this case.
2316 llvm::APInt ChunkSize(32, 1);
2317 ChunkExpr = IntegerLiteral::Create(
2318 CGF.getContext(), ChunkSize,
2319 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2320 SourceLocation());
2321 }
2322}
2323
2325 OpenMPDirectiveKind Kind, bool EmitChecks,
2326 bool ForceSimpleCall) {
2327 // Check if we should use the OMPBuilder
2328 auto *OMPRegionInfo =
2329 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2330 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2331 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2332 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2333 EmitChecks));
2334 CGF.Builder.restoreIP(AfterIP);
2335 return;
2336 }
2337
2338 if (!CGF.HaveInsertPoint())
2339 return;
2340 // Build call __kmpc_cancel_barrier(loc, thread_id);
2341 // Build call __kmpc_barrier(loc, thread_id);
2342 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2343 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2344 // thread_id);
2345 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2346 getThreadID(CGF, Loc)};
2347 if (OMPRegionInfo) {
2348 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2349 llvm::Value *Result = CGF.EmitRuntimeCall(
2350 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2351 OMPRTL___kmpc_cancel_barrier),
2352 Args);
2353 if (EmitChecks) {
2354 // if (__kmpc_cancel_barrier()) {
2355 // exit from construct;
2356 // }
2357 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2358 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2359 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2360 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2361 CGF.EmitBlock(ExitBB);
2362 // exit from construct;
2363 CodeGenFunction::JumpDest CancelDestination =
2364 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2365 CGF.EmitBranchThroughCleanup(CancelDestination);
2366 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2367 }
2368 return;
2369 }
2370 }
2371 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2372 CGM.getModule(), OMPRTL___kmpc_barrier),
2373 Args);
2374}
2375
2377 Expr *ME, bool IsFatal) {
2378 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2379 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2380 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2381 // *message)
2382 llvm::Value *Args[] = {
2383 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2384 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2385 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2386 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2387 CGM.getModule(), OMPRTL___kmpc_error),
2388 Args);
2389}
2390
2391/// Map the OpenMP loop schedule to the runtime enumeration.
2392static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2393 bool Chunked, bool Ordered) {
2394 switch (ScheduleKind) {
2395 case OMPC_SCHEDULE_static:
2396 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2397 : (Ordered ? OMP_ord_static : OMP_sch_static);
2398 case OMPC_SCHEDULE_dynamic:
2399 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2400 case OMPC_SCHEDULE_guided:
2401 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2402 case OMPC_SCHEDULE_runtime:
2403 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2404 case OMPC_SCHEDULE_auto:
2405 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2407 assert(!Chunked && "chunk was specified but schedule kind not known");
2408 return Ordered ? OMP_ord_static : OMP_sch_static;
2409 }
2410 llvm_unreachable("Unexpected runtime schedule");
2411}
2412
2413/// Map the OpenMP distribute schedule to the runtime enumeration.
2414static OpenMPSchedType
2416 // only static is allowed for dist_schedule
2417 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2418}
2419
2421 bool Chunked) const {
2422 OpenMPSchedType Schedule =
2423 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2424 return Schedule == OMP_sch_static;
2425}
2426
2428 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2429 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2430 return Schedule == OMP_dist_sch_static;
2431}
2432
2434 bool Chunked) const {
2435 OpenMPSchedType Schedule =
2436 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2437 return Schedule == OMP_sch_static_chunked;
2438}
2439
2441 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2442 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2443 return Schedule == OMP_dist_sch_static_chunked;
2444}
2445
2447 OpenMPSchedType Schedule =
2448 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2449 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2450 return Schedule != OMP_sch_static;
2451}
2452
2453static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2456 int Modifier = 0;
2457 switch (M1) {
2458 case OMPC_SCHEDULE_MODIFIER_monotonic:
2459 Modifier = OMP_sch_modifier_monotonic;
2460 break;
2461 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2462 Modifier = OMP_sch_modifier_nonmonotonic;
2463 break;
2464 case OMPC_SCHEDULE_MODIFIER_simd:
2465 if (Schedule == OMP_sch_static_chunked)
2466 Schedule = OMP_sch_static_balanced_chunked;
2467 break;
2470 break;
2471 }
2472 switch (M2) {
2473 case OMPC_SCHEDULE_MODIFIER_monotonic:
2474 Modifier = OMP_sch_modifier_monotonic;
2475 break;
2476 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2477 Modifier = OMP_sch_modifier_nonmonotonic;
2478 break;
2479 case OMPC_SCHEDULE_MODIFIER_simd:
2480 if (Schedule == OMP_sch_static_chunked)
2481 Schedule = OMP_sch_static_balanced_chunked;
2482 break;
2485 break;
2486 }
2487 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2488 // If the static schedule kind is specified or if the ordered clause is
2489 // specified, and if the nonmonotonic modifier is not specified, the effect is
2490 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2491 // modifier is specified, the effect is as if the nonmonotonic modifier is
2492 // specified.
2493 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2494 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2495 Schedule == OMP_sch_static_balanced_chunked ||
2496 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2497 Schedule == OMP_dist_sch_static_chunked ||
2498 Schedule == OMP_dist_sch_static))
2499 Modifier = OMP_sch_modifier_nonmonotonic;
2500 }
2501 return Schedule | Modifier;
2502}
2503
2506 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2507 bool Ordered, const DispatchRTInput &DispatchValues) {
2508 if (!CGF.HaveInsertPoint())
2509 return;
2510 OpenMPSchedType Schedule = getRuntimeSchedule(
2511 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2512 assert(Ordered ||
2513 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2514 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2515 Schedule != OMP_sch_static_balanced_chunked));
2516 // Call __kmpc_dispatch_init(
2517 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2518 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2519 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2520
2521 // If the Chunk was not specified in the clause - use default value 1.
2522 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2523 : CGF.Builder.getIntN(IVSize, 1);
2524 llvm::Value *Args[] = {
2525 emitUpdateLocation(CGF, Loc),
2526 getThreadID(CGF, Loc),
2527 CGF.Builder.getInt32(addMonoNonMonoModifier(
2528 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2529 DispatchValues.LB, // Lower
2530 DispatchValues.UB, // Upper
2531 CGF.Builder.getIntN(IVSize, 1), // Stride
2532 Chunk // Chunk
2533 };
2534 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2535 Args);
2536}
2537
2539 SourceLocation Loc) {
2540 if (!CGF.HaveInsertPoint())
2541 return;
2542 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2543 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2544 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2545}
2546
2548 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2549 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2551 const CGOpenMPRuntime::StaticRTInput &Values) {
2552 if (!CGF.HaveInsertPoint())
2553 return;
2554
2555 assert(!Values.Ordered);
2556 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2557 Schedule == OMP_sch_static_balanced_chunked ||
2558 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2559 Schedule == OMP_dist_sch_static ||
2560 Schedule == OMP_dist_sch_static_chunked);
2561
2562 // Call __kmpc_for_static_init(
2563 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2564 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2565 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2566 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2567 llvm::Value *Chunk = Values.Chunk;
2568 if (Chunk == nullptr) {
2569 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2570 Schedule == OMP_dist_sch_static) &&
2571 "expected static non-chunked schedule");
2572 // If the Chunk was not specified in the clause - use default value 1.
2573 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2574 } else {
2575 assert((Schedule == OMP_sch_static_chunked ||
2576 Schedule == OMP_sch_static_balanced_chunked ||
2577 Schedule == OMP_ord_static_chunked ||
2578 Schedule == OMP_dist_sch_static_chunked) &&
2579 "expected static chunked schedule");
2580 }
2581 llvm::Value *Args[] = {
2582 UpdateLocation,
2583 ThreadId,
2584 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2585 M2)), // Schedule type
2586 Values.IL.emitRawPointer(CGF), // &isLastIter
2587 Values.LB.emitRawPointer(CGF), // &LB
2588 Values.UB.emitRawPointer(CGF), // &UB
2589 Values.ST.emitRawPointer(CGF), // &Stride
2590 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2591 Chunk // Chunk
2592 };
2593 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2594}
2595
2597 SourceLocation Loc,
2598 OpenMPDirectiveKind DKind,
2599 const OpenMPScheduleTy &ScheduleKind,
2600 const StaticRTInput &Values) {
2601 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2602 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2603 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2604 "Expected loop-based or sections-based directive.");
2605 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2607 ? OMP_IDENT_WORK_LOOP
2608 : OMP_IDENT_WORK_SECTIONS);
2609 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2610 llvm::FunctionCallee StaticInitFunction =
2611 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2612 false);
2614 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2615 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2616}
2617
2621 const CGOpenMPRuntime::StaticRTInput &Values) {
2622 OpenMPSchedType ScheduleNum =
2623 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2624 llvm::Value *UpdatedLocation =
2625 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2626 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2627 llvm::FunctionCallee StaticInitFunction;
2628 bool isGPUDistribute =
2629 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2630 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2631 Values.IVSize, Values.IVSigned, isGPUDistribute);
2632
2633 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2634 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2636}
2637
2639 SourceLocation Loc,
2640 OpenMPDirectiveKind DKind) {
2641 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2642 DKind == OMPD_sections) &&
2643 "Expected distribute, for, or sections directive kind");
2644 if (!CGF.HaveInsertPoint())
2645 return;
2646 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2647 llvm::Value *Args[] = {
2648 emitUpdateLocation(CGF, Loc,
2650 (DKind == OMPD_target_teams_loop)
2651 ? OMP_IDENT_WORK_DISTRIBUTE
2652 : isOpenMPLoopDirective(DKind)
2653 ? OMP_IDENT_WORK_LOOP
2654 : OMP_IDENT_WORK_SECTIONS),
2655 getThreadID(CGF, Loc)};
2657 if (isOpenMPDistributeDirective(DKind) &&
2658 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2659 CGF.EmitRuntimeCall(
2660 OMPBuilder.getOrCreateRuntimeFunction(
2661 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2662 Args);
2663 else
2664 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2665 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2666 Args);
2667}
2668
2670 SourceLocation Loc,
2671 unsigned IVSize,
2672 bool IVSigned) {
2673 if (!CGF.HaveInsertPoint())
2674 return;
2675 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2676 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2677 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2678 Args);
2679}
2680
2682 SourceLocation Loc, unsigned IVSize,
2683 bool IVSigned, Address IL,
2684 Address LB, Address UB,
2685 Address ST) {
2686 // Call __kmpc_dispatch_next(
2687 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2688 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2689 // kmp_int[32|64] *p_stride);
2690 llvm::Value *Args[] = {
2691 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2692 IL.emitRawPointer(CGF), // &isLastIter
2693 LB.emitRawPointer(CGF), // &Lower
2694 UB.emitRawPointer(CGF), // &Upper
2695 ST.emitRawPointer(CGF) // &Stride
2696 };
2697 llvm::Value *Call = CGF.EmitRuntimeCall(
2698 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2699 return CGF.EmitScalarConversion(
2700 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2701 CGF.getContext().BoolTy, Loc);
2702}
2703
2705 const Expr *Message,
2706 SourceLocation Loc) {
2707 if (!Message)
2708 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2709 return CGF.EmitScalarExpr(Message);
2710}
2711
2712llvm::Value *
2714 const OMPMessageClause *MessageClause) {
2715 return emitMessageClause(
2716 CGF, MessageClause ? MessageClause->getMessageString() : nullptr,
2717 MessageClause->getBeginLoc());
2718}
2719
2720llvm::Value *
2722 SourceLocation Loc) {
2723 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2724 // as if sev-level is fatal."
2725 return llvm::ConstantInt::get(CGM.Int32Ty,
2726 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2727}
2728
2729llvm::Value *
2731 return emitSeverityClause(SeverityClause ? SeverityClause->getSeverityKind()
2733 SeverityClause->getBeginLoc());
2734}
2735
2737 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2739 SourceLocation SeverityLoc, const Expr *Message,
2740 SourceLocation MessageLoc) {
2741 if (!CGF.HaveInsertPoint())
2742 return;
2744 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2745 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2746 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2747 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2748 // messsage) if strict modifier is used.
2749 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2750 if (Modifier == OMPC_NUMTHREADS_strict) {
2751 FnID = OMPRTL___kmpc_push_num_threads_strict;
2752 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2753 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2754 }
2755 CGF.EmitRuntimeCall(
2756 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2757}
2758
2760 ProcBindKind ProcBind,
2761 SourceLocation Loc) {
2762 if (!CGF.HaveInsertPoint())
2763 return;
2764 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2765 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2766 llvm::Value *Args[] = {
2767 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2768 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2769 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2770 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2771 Args);
2772}
2773
2775 SourceLocation Loc, llvm::AtomicOrdering AO) {
2776 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2777 OMPBuilder.createFlush(CGF.Builder);
2778 } else {
2779 if (!CGF.HaveInsertPoint())
2780 return;
2781 // Build call void __kmpc_flush(ident_t *loc)
2782 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2783 CGM.getModule(), OMPRTL___kmpc_flush),
2784 emitUpdateLocation(CGF, Loc));
2785 }
2786}
2787
2788namespace {
2789/// Indexes of fields for type kmp_task_t.
2790enum KmpTaskTFields {
2791 /// List of shared variables.
2792 KmpTaskTShareds,
2793 /// Task routine.
2794 KmpTaskTRoutine,
2795 /// Partition id for the untied tasks.
2796 KmpTaskTPartId,
2797 /// Function with call of destructors for private variables.
2798 Data1,
2799 /// Task priority.
2800 Data2,
2801 /// (Taskloops only) Lower bound.
2802 KmpTaskTLowerBound,
2803 /// (Taskloops only) Upper bound.
2804 KmpTaskTUpperBound,
2805 /// (Taskloops only) Stride.
2806 KmpTaskTStride,
2807 /// (Taskloops only) Is last iteration flag.
2808 KmpTaskTLastIter,
2809 /// (Taskloops only) Reduction data.
2810 KmpTaskTReductions,
2811};
2812} // anonymous namespace
2813
2815 // If we are in simd mode or there are no entries, we don't need to do
2816 // anything.
2817 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2818 return;
2819
2820 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2821 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2822 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2823 SourceLocation Loc;
2824 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2825 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2826 E = CGM.getContext().getSourceManager().fileinfo_end();
2827 I != E; ++I) {
2828 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2829 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2830 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2831 I->getFirst(), EntryInfo.Line, 1);
2832 break;
2833 }
2834 }
2835 }
2836 switch (Kind) {
2837 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2838 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2839 DiagnosticsEngine::Error, "Offloading entry for target region in "
2840 "%0 is incorrect: either the "
2841 "address or the ID is invalid.");
2842 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2843 } break;
2844 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2845 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2846 DiagnosticsEngine::Error, "Offloading entry for declare target "
2847 "variable %0 is incorrect: the "
2848 "address is invalid.");
2849 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2850 } break;
2851 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2852 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2854 "Offloading entry for declare target variable is incorrect: the "
2855 "address is invalid.");
2856 CGM.getDiags().Report(DiagID);
2857 } break;
2858 }
2859 };
2860
2861 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2862}
2863
2865 if (!KmpRoutineEntryPtrTy) {
2866 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2867 ASTContext &C = CGM.getContext();
2868 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2870 KmpRoutineEntryPtrQTy = C.getPointerType(
2871 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2872 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2873 }
2874}
2875
2876namespace {
2877struct PrivateHelpersTy {
2878 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2879 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2880 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2881 PrivateElemInit(PrivateElemInit) {}
2882 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2883 const Expr *OriginalRef = nullptr;
2884 const VarDecl *Original = nullptr;
2885 const VarDecl *PrivateCopy = nullptr;
2886 const VarDecl *PrivateElemInit = nullptr;
2887 bool isLocalPrivate() const {
2888 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2889 }
2890};
2891typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2892} // anonymous namespace
2893
2894static bool isAllocatableDecl(const VarDecl *VD) {
2895 const VarDecl *CVD = VD->getCanonicalDecl();
2896 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2897 return false;
2898 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2899 // Use the default allocation.
2900 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2901 !AA->getAllocator());
2902}
2903
2904static RecordDecl *
2906 if (!Privates.empty()) {
2907 ASTContext &C = CGM.getContext();
2908 // Build struct .kmp_privates_t. {
2909 // /* private vars */
2910 // };
2911 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2912 RD->startDefinition();
2913 for (const auto &Pair : Privates) {
2914 const VarDecl *VD = Pair.second.Original;
2916 // If the private variable is a local variable with lvalue ref type,
2917 // allocate the pointer instead of the pointee type.
2918 if (Pair.second.isLocalPrivate()) {
2919 if (VD->getType()->isLValueReferenceType())
2920 Type = C.getPointerType(Type);
2921 if (isAllocatableDecl(VD))
2922 Type = C.getPointerType(Type);
2923 }
2925 if (VD->hasAttrs()) {
2926 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2927 E(VD->getAttrs().end());
2928 I != E; ++I)
2929 FD->addAttr(*I);
2930 }
2931 }
2932 RD->completeDefinition();
2933 return RD;
2934 }
2935 return nullptr;
2936}
2937
2938static RecordDecl *
2940 QualType KmpInt32Ty,
2941 QualType KmpRoutineEntryPointerQTy) {
2942 ASTContext &C = CGM.getContext();
2943 // Build struct kmp_task_t {
2944 // void * shareds;
2945 // kmp_routine_entry_t routine;
2946 // kmp_int32 part_id;
2947 // kmp_cmplrdata_t data1;
2948 // kmp_cmplrdata_t data2;
2949 // For taskloops additional fields:
2950 // kmp_uint64 lb;
2951 // kmp_uint64 ub;
2952 // kmp_int64 st;
2953 // kmp_int32 liter;
2954 // void * reductions;
2955 // };
2956 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2957 UD->startDefinition();
2958 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2959 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2960 UD->completeDefinition();
2961 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
2962 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2963 RD->startDefinition();
2964 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2965 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2966 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2967 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2968 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2969 if (isOpenMPTaskLoopDirective(Kind)) {
2970 QualType KmpUInt64Ty =
2971 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2972 QualType KmpInt64Ty =
2973 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2974 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2975 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2976 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2977 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2978 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2979 }
2980 RD->completeDefinition();
2981 return RD;
2982}
2983
2984static RecordDecl *
2986 ArrayRef<PrivateDataTy> Privates) {
2987 ASTContext &C = CGM.getContext();
2988 // Build struct kmp_task_t_with_privates {
2989 // kmp_task_t task_data;
2990 // .kmp_privates_t. privates;
2991 // };
2992 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2993 RD->startDefinition();
2994 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2995 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2996 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
2997 RD->completeDefinition();
2998 return RD;
2999}
3000
3001/// Emit a proxy function which accepts kmp_task_t as the second
3002/// argument.
3003/// \code
3004/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3005/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3006/// For taskloops:
3007/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3008/// tt->reductions, tt->shareds);
3009/// return 0;
3010/// }
3011/// \endcode
3012static llvm::Function *
3014 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3015 QualType KmpTaskTWithPrivatesPtrQTy,
3016 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3017 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3018 llvm::Value *TaskPrivatesMap) {
3019 ASTContext &C = CGM.getContext();
3020 FunctionArgList Args;
3021 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3023 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3024 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3026 Args.push_back(&GtidArg);
3027 Args.push_back(&TaskTypeArg);
3028 const auto &TaskEntryFnInfo =
3029 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3030 llvm::FunctionType *TaskEntryTy =
3031 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3032 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3033 auto *TaskEntry = llvm::Function::Create(
3034 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3035 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3036 TaskEntry->setDoesNotRecurse();
3037 CodeGenFunction CGF(CGM);
3038 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3039 Loc, Loc);
3040
3041 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3042 // tt,
3043 // For taskloops:
3044 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3045 // tt->task_data.shareds);
3046 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3047 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3048 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3049 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3050 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3051 const auto *KmpTaskTWithPrivatesQTyRD =
3052 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3053 LValue Base =
3054 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3055 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3056 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3057 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3058 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3059
3060 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3061 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3062 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3063 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3064 CGF.ConvertTypeForMem(SharedsPtrTy));
3065
3066 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3067 llvm::Value *PrivatesParam;
3068 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3069 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3070 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3071 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3072 } else {
3073 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3074 }
3075
3076 llvm::Value *CommonArgs[] = {
3077 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3078 CGF.Builder
3079 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3080 CGF.VoidPtrTy, CGF.Int8Ty)
3081 .emitRawPointer(CGF)};
3082 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3083 std::end(CommonArgs));
3084 if (isOpenMPTaskLoopDirective(Kind)) {
3085 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3086 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3087 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3088 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3089 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3090 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3091 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3092 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3093 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3094 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3095 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3096 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3097 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3098 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3099 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3100 CallArgs.push_back(LBParam);
3101 CallArgs.push_back(UBParam);
3102 CallArgs.push_back(StParam);
3103 CallArgs.push_back(LIParam);
3104 CallArgs.push_back(RParam);
3105 }
3106 CallArgs.push_back(SharedsParam);
3107
3108 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3109 CallArgs);
3110 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3111 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3112 CGF.FinishFunction();
3113 return TaskEntry;
3114}
3115
3117 SourceLocation Loc,
3118 QualType KmpInt32Ty,
3119 QualType KmpTaskTWithPrivatesPtrQTy,
3120 QualType KmpTaskTWithPrivatesQTy) {
3121 ASTContext &C = CGM.getContext();
3122 FunctionArgList Args;
3123 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3125 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3126 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3128 Args.push_back(&GtidArg);
3129 Args.push_back(&TaskTypeArg);
3130 const auto &DestructorFnInfo =
3131 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3132 llvm::FunctionType *DestructorFnTy =
3133 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3134 std::string Name =
3135 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3136 auto *DestructorFn =
3137 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3138 Name, &CGM.getModule());
3139 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3140 DestructorFnInfo);
3141 DestructorFn->setDoesNotRecurse();
3142 CodeGenFunction CGF(CGM);
3143 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3144 Args, Loc, Loc);
3145
3146 LValue Base = CGF.EmitLoadOfPointerLValue(
3147 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3148 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3149 const auto *KmpTaskTWithPrivatesQTyRD =
3150 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3151 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3152 Base = CGF.EmitLValueForField(Base, *FI);
3153 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3154 if (QualType::DestructionKind DtorKind =
3155 Field->getType().isDestructedType()) {
3156 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3157 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3158 }
3159 }
3160 CGF.FinishFunction();
3161 return DestructorFn;
3162}
3163
3164/// Emit a privates mapping function for correct handling of private and
3165/// firstprivate variables.
3166/// \code
3167/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3168/// **noalias priv1,..., <tyn> **noalias privn) {
3169/// *priv1 = &.privates.priv1;
3170/// ...;
3171/// *privn = &.privates.privn;
3172/// }
3173/// \endcode
3174static llvm::Value *
3176 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3177 ArrayRef<PrivateDataTy> Privates) {
3178 ASTContext &C = CGM.getContext();
3179 FunctionArgList Args;
3180 ImplicitParamDecl TaskPrivatesArg(
3181 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3182 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3184 Args.push_back(&TaskPrivatesArg);
3185 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3186 unsigned Counter = 1;
3187 for (const Expr *E : Data.PrivateVars) {
3188 Args.push_back(ImplicitParamDecl::Create(
3189 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3190 C.getPointerType(C.getPointerType(E->getType()))
3191 .withConst()
3192 .withRestrict(),
3194 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3195 PrivateVarsPos[VD] = Counter;
3196 ++Counter;
3197 }
3198 for (const Expr *E : Data.FirstprivateVars) {
3199 Args.push_back(ImplicitParamDecl::Create(
3200 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3201 C.getPointerType(C.getPointerType(E->getType()))
3202 .withConst()
3203 .withRestrict(),
3205 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3206 PrivateVarsPos[VD] = Counter;
3207 ++Counter;
3208 }
3209 for (const Expr *E : Data.LastprivateVars) {
3210 Args.push_back(ImplicitParamDecl::Create(
3211 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3212 C.getPointerType(C.getPointerType(E->getType()))
3213 .withConst()
3214 .withRestrict(),
3216 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3217 PrivateVarsPos[VD] = Counter;
3218 ++Counter;
3219 }
3220 for (const VarDecl *VD : Data.PrivateLocals) {
3222 if (VD->getType()->isLValueReferenceType())
3223 Ty = C.getPointerType(Ty);
3224 if (isAllocatableDecl(VD))
3225 Ty = C.getPointerType(Ty);
3226 Args.push_back(ImplicitParamDecl::Create(
3227 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3228 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3230 PrivateVarsPos[VD] = Counter;
3231 ++Counter;
3232 }
3233 const auto &TaskPrivatesMapFnInfo =
3234 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3235 llvm::FunctionType *TaskPrivatesMapTy =
3236 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3237 std::string Name =
3238 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3239 auto *TaskPrivatesMap = llvm::Function::Create(
3240 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3241 &CGM.getModule());
3242 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3243 TaskPrivatesMapFnInfo);
3244 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3245 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3246 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3247 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3248 }
3249 CodeGenFunction CGF(CGM);
3250 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3251 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3252
3253 // *privi = &.privates.privi;
3254 LValue Base = CGF.EmitLoadOfPointerLValue(
3255 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3256 TaskPrivatesArg.getType()->castAs<PointerType>());
3257 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3258 Counter = 0;
3259 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3260 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3261 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3262 LValue RefLVal =
3263 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3264 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3265 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3266 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3267 ++Counter;
3268 }
3269 CGF.FinishFunction();
3270 return TaskPrivatesMap;
3271}
3272
3273/// Emit initialization for private variables in task-based directives.
3275 const OMPExecutableDirective &D,
3276 Address KmpTaskSharedsPtr, LValue TDBase,
3277 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3278 QualType SharedsTy, QualType SharedsPtrTy,
3279 const OMPTaskDataTy &Data,
3280 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3281 ASTContext &C = CGF.getContext();
3282 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3283 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3284 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3285 ? OMPD_taskloop
3286 : OMPD_task;
3287 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3288 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3289 LValue SrcBase;
3290 bool IsTargetTask =
3291 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3292 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3293 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3294 // PointersArray, SizesArray, and MappersArray. The original variables for
3295 // these arrays are not captured and we get their addresses explicitly.
3296 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3297 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3298 SrcBase = CGF.MakeAddrLValue(
3300 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3301 CGF.ConvertTypeForMem(SharedsTy)),
3302 SharedsTy);
3303 }
3304 FI = FI->getType()->castAsRecordDecl()->field_begin();
3305 for (const PrivateDataTy &Pair : Privates) {
3306 // Do not initialize private locals.
3307 if (Pair.second.isLocalPrivate()) {
3308 ++FI;
3309 continue;
3310 }
3311 const VarDecl *VD = Pair.second.PrivateCopy;
3312 const Expr *Init = VD->getAnyInitializer();
3313 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3314 !CGF.isTrivialInitializer(Init)))) {
3315 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3316 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3317 const VarDecl *OriginalVD = Pair.second.Original;
3318 // Check if the variable is the target-based BasePointersArray,
3319 // PointersArray, SizesArray, or MappersArray.
3320 LValue SharedRefLValue;
3321 QualType Type = PrivateLValue.getType();
3322 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3323 if (IsTargetTask && !SharedField) {
3324 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3325 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3326 cast<CapturedDecl>(OriginalVD->getDeclContext())
3327 ->getNumParams() == 0 &&
3329 cast<CapturedDecl>(OriginalVD->getDeclContext())
3330 ->getDeclContext()) &&
3331 "Expected artificial target data variable.");
3332 SharedRefLValue =
3333 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3334 } else if (ForDup) {
3335 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3336 SharedRefLValue = CGF.MakeAddrLValue(
3337 SharedRefLValue.getAddress().withAlignment(
3338 C.getDeclAlign(OriginalVD)),
3339 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3340 SharedRefLValue.getTBAAInfo());
3341 } else if (CGF.LambdaCaptureFields.count(
3342 Pair.second.Original->getCanonicalDecl()) > 0 ||
3343 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3344 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3345 } else {
3346 // Processing for implicitly captured variables.
3347 InlinedOpenMPRegionRAII Region(
3348 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3349 /*HasCancel=*/false, /*NoInheritance=*/true);
3350 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3351 }
3352 if (Type->isArrayType()) {
3353 // Initialize firstprivate array.
3355 // Perform simple memcpy.
3356 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3357 } else {
3358 // Initialize firstprivate array using element-by-element
3359 // initialization.
3361 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3362 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3363 Address SrcElement) {
3364 // Clean up any temporaries needed by the initialization.
3365 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3366 InitScope.addPrivate(Elem, SrcElement);
3367 (void)InitScope.Privatize();
3368 // Emit initialization for single element.
3369 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3370 CGF, &CapturesInfo);
3371 CGF.EmitAnyExprToMem(Init, DestElement,
3372 Init->getType().getQualifiers(),
3373 /*IsInitializer=*/false);
3374 });
3375 }
3376 } else {
3377 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3378 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3379 (void)InitScope.Privatize();
3380 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3381 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3382 /*capturedByInit=*/false);
3383 }
3384 } else {
3385 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3386 }
3387 }
3388 ++FI;
3389 }
3390}
3391
3392/// Check if duplication function is required for taskloops.
3394 ArrayRef<PrivateDataTy> Privates) {
3395 bool InitRequired = false;
3396 for (const PrivateDataTy &Pair : Privates) {
3397 if (Pair.second.isLocalPrivate())
3398 continue;
3399 const VarDecl *VD = Pair.second.PrivateCopy;
3400 const Expr *Init = VD->getAnyInitializer();
3401 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3403 if (InitRequired)
3404 break;
3405 }
3406 return InitRequired;
3407}
3408
3409
3410/// Emit task_dup function (for initialization of
3411/// private/firstprivate/lastprivate vars and last_iter flag)
3412/// \code
3413/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3414/// lastpriv) {
3415/// // setup lastprivate flag
3416/// task_dst->last = lastpriv;
3417/// // could be constructor calls here...
3418/// }
3419/// \endcode
3420static llvm::Value *
3422 const OMPExecutableDirective &D,
3423 QualType KmpTaskTWithPrivatesPtrQTy,
3424 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3425 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3426 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3427 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3428 ASTContext &C = CGM.getContext();
3429 FunctionArgList Args;
3430 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3431 KmpTaskTWithPrivatesPtrQTy,
3433 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3434 KmpTaskTWithPrivatesPtrQTy,
3436 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3438 Args.push_back(&DstArg);
3439 Args.push_back(&SrcArg);
3440 Args.push_back(&LastprivArg);
3441 const auto &TaskDupFnInfo =
3442 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3443 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3444 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3445 auto *TaskDup = llvm::Function::Create(
3446 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3447 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3448 TaskDup->setDoesNotRecurse();
3449 CodeGenFunction CGF(CGM);
3450 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3451 Loc);
3452
3453 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3454 CGF.GetAddrOfLocalVar(&DstArg),
3455 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3456 // task_dst->liter = lastpriv;
3457 if (WithLastIter) {
3458 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3459 LValue Base = CGF.EmitLValueForField(
3460 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3461 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3462 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3463 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3464 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3465 }
3466
3467 // Emit initial values for private copies (if any).
3468 assert(!Privates.empty());
3469 Address KmpTaskSharedsPtr = Address::invalid();
3470 if (!Data.FirstprivateVars.empty()) {
3471 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3472 CGF.GetAddrOfLocalVar(&SrcArg),
3473 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3474 LValue Base = CGF.EmitLValueForField(
3475 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3476 KmpTaskSharedsPtr = Address(
3478 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3479 KmpTaskTShareds)),
3480 Loc),
3481 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3482 }
3483 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3484 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3485 CGF.FinishFunction();
3486 return TaskDup;
3487}
3488
3489/// Checks if destructor function is required to be generated.
3490/// \return true if cleanups are required, false otherwise.
3491static bool
3492checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3493 ArrayRef<PrivateDataTy> Privates) {
3494 for (const PrivateDataTy &P : Privates) {
3495 if (P.second.isLocalPrivate())
3496 continue;
3497 QualType Ty = P.second.Original->getType().getNonReferenceType();
3498 if (Ty.isDestructedType())
3499 return true;
3500 }
3501 return false;
3502}
3503
3504namespace {
3505/// Loop generator for OpenMP iterator expression.
3506class OMPIteratorGeneratorScope final
3508 CodeGenFunction &CGF;
3509 const OMPIteratorExpr *E = nullptr;
3510 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3511 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3512 OMPIteratorGeneratorScope() = delete;
3513 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3514
3515public:
3516 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3517 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3518 if (!E)
3519 return;
3520 SmallVector<llvm::Value *, 4> Uppers;
3521 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3522 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3523 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3524 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3525 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3526 addPrivate(
3527 HelperData.CounterVD,
3528 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3529 }
3530 Privatize();
3531
3532 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3533 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3534 LValue CLVal =
3535 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3536 HelperData.CounterVD->getType());
3537 // Counter = 0;
3538 CGF.EmitStoreOfScalar(
3539 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3540 CLVal);
3541 CodeGenFunction::JumpDest &ContDest =
3542 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3543 CodeGenFunction::JumpDest &ExitDest =
3544 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3545 // N = <number-of_iterations>;
3546 llvm::Value *N = Uppers[I];
3547 // cont:
3548 // if (Counter < N) goto body; else goto exit;
3549 CGF.EmitBlock(ContDest.getBlock());
3550 auto *CVal =
3551 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3552 llvm::Value *Cmp =
3553 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3554 ? CGF.Builder.CreateICmpSLT(CVal, N)
3555 : CGF.Builder.CreateICmpULT(CVal, N);
3556 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3557 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3558 // body:
3559 CGF.EmitBlock(BodyBB);
3560 // Iteri = Begini + Counter * Stepi;
3561 CGF.EmitIgnoredExpr(HelperData.Update);
3562 }
3563 }
3564 ~OMPIteratorGeneratorScope() {
3565 if (!E)
3566 return;
3567 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3568 // Counter = Counter + 1;
3569 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3570 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3571 // goto cont;
3572 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3573 // exit:
3574 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3575 }
3576 }
3577};
3578} // namespace
3579
3580static std::pair<llvm::Value *, llvm::Value *>
3582 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3583 llvm::Value *Addr;
3584 if (OASE) {
3585 const Expr *Base = OASE->getBase();
3586 Addr = CGF.EmitScalarExpr(Base);
3587 } else {
3588 Addr = CGF.EmitLValue(E).getPointer(CGF);
3589 }
3590 llvm::Value *SizeVal;
3591 QualType Ty = E->getType();
3592 if (OASE) {
3593 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3594 for (const Expr *SE : OASE->getDimensions()) {
3595 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3596 Sz = CGF.EmitScalarConversion(
3597 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3598 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3599 }
3600 } else if (const auto *ASE =
3601 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3602 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3603 Address UpAddrAddress = UpAddrLVal.getAddress();
3604 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3605 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3606 /*Idx0=*/1);
3607 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3608 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3609 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3610 } else {
3611 SizeVal = CGF.getTypeSize(Ty);
3612 }
3613 return std::make_pair(Addr, SizeVal);
3614}
3615
3616/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3617static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3618 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3619 if (KmpTaskAffinityInfoTy.isNull()) {
3620 RecordDecl *KmpAffinityInfoRD =
3621 C.buildImplicitRecord("kmp_task_affinity_info_t");
3622 KmpAffinityInfoRD->startDefinition();
3623 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3624 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3625 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3626 KmpAffinityInfoRD->completeDefinition();
3627 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3628 }
3629}
3630
3633 const OMPExecutableDirective &D,
3634 llvm::Function *TaskFunction, QualType SharedsTy,
3635 Address Shareds, const OMPTaskDataTy &Data) {
3636 ASTContext &C = CGM.getContext();
3638 // Aggregate privates and sort them by the alignment.
3639 const auto *I = Data.PrivateCopies.begin();
3640 for (const Expr *E : Data.PrivateVars) {
3641 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3642 Privates.emplace_back(
3643 C.getDeclAlign(VD),
3644 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3645 /*PrivateElemInit=*/nullptr));
3646 ++I;
3647 }
3648 I = Data.FirstprivateCopies.begin();
3649 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3650 for (const Expr *E : Data.FirstprivateVars) {
3651 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3652 Privates.emplace_back(
3653 C.getDeclAlign(VD),
3654 PrivateHelpersTy(
3655 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3656 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3657 ++I;
3658 ++IElemInitRef;
3659 }
3660 I = Data.LastprivateCopies.begin();
3661 for (const Expr *E : Data.LastprivateVars) {
3662 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3663 Privates.emplace_back(
3664 C.getDeclAlign(VD),
3665 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3666 /*PrivateElemInit=*/nullptr));
3667 ++I;
3668 }
3669 for (const VarDecl *VD : Data.PrivateLocals) {
3670 if (isAllocatableDecl(VD))
3671 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3672 else
3673 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3674 }
3675 llvm::stable_sort(Privates,
3676 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3677 return L.first > R.first;
3678 });
3679 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3680 // Build type kmp_routine_entry_t (if not built yet).
3681 emitKmpRoutineEntryT(KmpInt32Ty);
3682 // Build type kmp_task_t (if not built yet).
3683 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3684 if (SavedKmpTaskloopTQTy.isNull()) {
3685 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3686 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3687 }
3689 } else {
3690 assert((D.getDirectiveKind() == OMPD_task ||
3691 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3692 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3693 "Expected taskloop, task or target directive");
3694 if (SavedKmpTaskTQTy.isNull()) {
3695 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3696 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3697 }
3699 }
3700 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3701 // Build particular struct kmp_task_t for the given task.
3702 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3704 CanQualType KmpTaskTWithPrivatesQTy =
3705 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3706 QualType KmpTaskTWithPrivatesPtrQTy =
3707 C.getPointerType(KmpTaskTWithPrivatesQTy);
3708 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3709 llvm::Value *KmpTaskTWithPrivatesTySize =
3710 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3711 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3712
3713 // Emit initial values for private copies (if any).
3714 llvm::Value *TaskPrivatesMap = nullptr;
3715 llvm::Type *TaskPrivatesMapTy =
3716 std::next(TaskFunction->arg_begin(), 3)->getType();
3717 if (!Privates.empty()) {
3718 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3719 TaskPrivatesMap =
3720 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3721 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3722 TaskPrivatesMap, TaskPrivatesMapTy);
3723 } else {
3724 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3725 cast<llvm::PointerType>(TaskPrivatesMapTy));
3726 }
3727 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3728 // kmp_task_t *tt);
3729 llvm::Function *TaskEntry = emitProxyTaskFunction(
3730 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3731 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3732 TaskPrivatesMap);
3733
3734 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3735 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3736 // kmp_routine_entry_t *task_entry);
3737 // Task flags. Format is taken from
3738 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3739 // description of kmp_tasking_flags struct.
3740 enum {
3741 TiedFlag = 0x1,
3742 FinalFlag = 0x2,
3743 DestructorsFlag = 0x8,
3744 PriorityFlag = 0x20,
3745 DetachableFlag = 0x40,
3746 };
3747 unsigned Flags = Data.Tied ? TiedFlag : 0;
3748 bool NeedsCleanup = false;
3749 if (!Privates.empty()) {
3750 NeedsCleanup =
3751 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3752 if (NeedsCleanup)
3753 Flags = Flags | DestructorsFlag;
3754 }
3755 if (Data.Priority.getInt())
3756 Flags = Flags | PriorityFlag;
3757 if (D.hasClausesOfKind<OMPDetachClause>())
3758 Flags = Flags | DetachableFlag;
3759 llvm::Value *TaskFlags =
3760 Data.Final.getPointer()
3761 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3762 CGF.Builder.getInt32(FinalFlag),
3763 CGF.Builder.getInt32(/*C=*/0))
3764 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3765 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3766 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3768 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3770 TaskEntry, KmpRoutineEntryPtrTy)};
3771 llvm::Value *NewTask;
3772 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3773 // Check if we have any device clause associated with the directive.
3774 const Expr *Device = nullptr;
3775 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3776 Device = C->getDevice();
3777 // Emit device ID if any otherwise use default value.
3778 llvm::Value *DeviceID;
3779 if (Device)
3780 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3781 CGF.Int64Ty, /*isSigned=*/true);
3782 else
3783 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3784 AllocArgs.push_back(DeviceID);
3785 NewTask = CGF.EmitRuntimeCall(
3786 OMPBuilder.getOrCreateRuntimeFunction(
3787 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3788 AllocArgs);
3789 } else {
3790 NewTask =
3791 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3792 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3793 AllocArgs);
3794 }
3795 // Emit detach clause initialization.
3796 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3797 // task_descriptor);
3798 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3799 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3800 LValue EvtLVal = CGF.EmitLValue(Evt);
3801
3802 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3803 // int gtid, kmp_task_t *task);
3804 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3805 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3806 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3807 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3808 OMPBuilder.getOrCreateRuntimeFunction(
3809 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3810 {Loc, Tid, NewTask});
3811 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3812 Evt->getExprLoc());
3813 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3814 }
3815 // Process affinity clauses.
3816 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3817 // Process list of affinity data.
3818 ASTContext &C = CGM.getContext();
3819 Address AffinitiesArray = Address::invalid();
3820 // Calculate number of elements to form the array of affinity data.
3821 llvm::Value *NumOfElements = nullptr;
3822 unsigned NumAffinities = 0;
3823 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3824 if (const Expr *Modifier = C->getModifier()) {
3825 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3826 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3827 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3828 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3829 NumOfElements =
3830 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3831 }
3832 } else {
3833 NumAffinities += C->varlist_size();
3834 }
3835 }
3837 // Fields ids in kmp_task_affinity_info record.
3838 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3839
3840 QualType KmpTaskAffinityInfoArrayTy;
3841 if (NumOfElements) {
3842 NumOfElements = CGF.Builder.CreateNUWAdd(
3843 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3844 auto *OVE = new (C) OpaqueValueExpr(
3845 Loc,
3846 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3847 VK_PRValue);
3848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3849 RValue::get(NumOfElements));
3850 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3852 /*IndexTypeQuals=*/0);
3853 // Properly emit variable-sized array.
3854 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3856 CGF.EmitVarDecl(*PD);
3857 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3858 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3859 /*isSigned=*/false);
3860 } else {
3861 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3863 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3864 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3865 AffinitiesArray =
3866 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3867 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3868 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3869 /*isSigned=*/false);
3870 }
3871
3872 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3873 // Fill array by elements without iterators.
3874 unsigned Pos = 0;
3875 bool HasIterator = false;
3876 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3877 if (C->getModifier()) {
3878 HasIterator = true;
3879 continue;
3880 }
3881 for (const Expr *E : C->varlist()) {
3882 llvm::Value *Addr;
3883 llvm::Value *Size;
3884 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3885 LValue Base =
3886 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3888 // affs[i].base_addr = &<Affinities[i].second>;
3889 LValue BaseAddrLVal = CGF.EmitLValueForField(
3890 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3891 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3892 BaseAddrLVal);
3893 // affs[i].len = sizeof(<Affinities[i].second>);
3894 LValue LenLVal = CGF.EmitLValueForField(
3895 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3896 CGF.EmitStoreOfScalar(Size, LenLVal);
3897 ++Pos;
3898 }
3899 }
3900 LValue PosLVal;
3901 if (HasIterator) {
3902 PosLVal = CGF.MakeAddrLValue(
3903 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3904 C.getSizeType());
3905 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3906 }
3907 // Process elements with iterators.
3908 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3909 const Expr *Modifier = C->getModifier();
3910 if (!Modifier)
3911 continue;
3912 OMPIteratorGeneratorScope IteratorScope(
3913 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3914 for (const Expr *E : C->varlist()) {
3915 llvm::Value *Addr;
3916 llvm::Value *Size;
3917 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3918 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3919 LValue Base =
3920 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3922 // affs[i].base_addr = &<Affinities[i].second>;
3923 LValue BaseAddrLVal = CGF.EmitLValueForField(
3924 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3925 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3926 BaseAddrLVal);
3927 // affs[i].len = sizeof(<Affinities[i].second>);
3928 LValue LenLVal = CGF.EmitLValueForField(
3929 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3930 CGF.EmitStoreOfScalar(Size, LenLVal);
3931 Idx = CGF.Builder.CreateNUWAdd(
3932 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3933 CGF.EmitStoreOfScalar(Idx, PosLVal);
3934 }
3935 }
3936 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3937 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3938 // naffins, kmp_task_affinity_info_t *affin_list);
3939 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3940 llvm::Value *GTid = getThreadID(CGF, Loc);
3941 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3942 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3943 // FIXME: Emit the function and ignore its result for now unless the
3944 // runtime function is properly implemented.
3945 (void)CGF.EmitRuntimeCall(
3946 OMPBuilder.getOrCreateRuntimeFunction(
3947 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3948 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3949 }
3950 llvm::Value *NewTaskNewTaskTTy =
3952 NewTask, KmpTaskTWithPrivatesPtrTy);
3953 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3954 KmpTaskTWithPrivatesQTy);
3955 LValue TDBase =
3956 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3957 // Fill the data in the resulting kmp_task_t record.
3958 // Copy shareds if there are any.
3959 Address KmpTaskSharedsPtr = Address::invalid();
3960 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
3961 KmpTaskSharedsPtr = Address(
3962 CGF.EmitLoadOfScalar(
3964 TDBase,
3965 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3966 Loc),
3967 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3968 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3969 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3970 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3971 }
3972 // Emit initial values for private copies (if any).
3974 if (!Privates.empty()) {
3975 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3976 SharedsTy, SharedsPtrTy, Data, Privates,
3977 /*ForDup=*/false);
3978 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3979 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3980 Result.TaskDupFn = emitTaskDupFunction(
3981 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3982 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3983 /*WithLastIter=*/!Data.LastprivateVars.empty());
3984 }
3985 }
3986 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3987 enum { Priority = 0, Destructors = 1 };
3988 // Provide pointer to function with destructors for privates.
3989 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3990 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
3991 assert(KmpCmplrdataUD->isUnion());
3992 if (NeedsCleanup) {
3993 llvm::Value *DestructorFn = emitDestructorsFunction(
3994 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3995 KmpTaskTWithPrivatesQTy);
3996 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3997 LValue DestructorsLV = CGF.EmitLValueForField(
3998 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4000 DestructorFn, KmpRoutineEntryPtrTy),
4001 DestructorsLV);
4002 }
4003 // Set priority.
4004 if (Data.Priority.getInt()) {
4005 LValue Data2LV = CGF.EmitLValueForField(
4006 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4007 LValue PriorityLV = CGF.EmitLValueForField(
4008 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4009 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4010 }
4011 Result.NewTask = NewTask;
4012 Result.TaskEntry = TaskEntry;
4013 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4014 Result.TDBase = TDBase;
4015 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4016 return Result;
4017}
4018
4019/// Translates internal dependency kind into the runtime kind.
4021 RTLDependenceKindTy DepKind;
4022 switch (K) {
4023 case OMPC_DEPEND_in:
4024 DepKind = RTLDependenceKindTy::DepIn;
4025 break;
4026 // Out and InOut dependencies must use the same code.
4027 case OMPC_DEPEND_out:
4028 case OMPC_DEPEND_inout:
4029 DepKind = RTLDependenceKindTy::DepInOut;
4030 break;
4031 case OMPC_DEPEND_mutexinoutset:
4032 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4033 break;
4034 case OMPC_DEPEND_inoutset:
4035 DepKind = RTLDependenceKindTy::DepInOutSet;
4036 break;
4037 case OMPC_DEPEND_outallmemory:
4038 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4039 break;
4040 case OMPC_DEPEND_source:
4041 case OMPC_DEPEND_sink:
4042 case OMPC_DEPEND_depobj:
4043 case OMPC_DEPEND_inoutallmemory:
4045 llvm_unreachable("Unknown task dependence type");
4046 }
4047 return DepKind;
4048}
4049
4050/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4051static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4052 QualType &FlagsTy) {
4053 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4054 if (KmpDependInfoTy.isNull()) {
4055 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4056 KmpDependInfoRD->startDefinition();
4057 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4058 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4059 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4060 KmpDependInfoRD->completeDefinition();
4061 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4062 }
4063}
4064
4065std::pair<llvm::Value *, LValue>
4067 SourceLocation Loc) {
4068 ASTContext &C = CGM.getContext();
4069 QualType FlagsTy;
4070 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4071 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4072 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4074 DepobjLVal.getAddress().withElementType(
4075 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4076 KmpDependInfoPtrTy->castAs<PointerType>());
4077 Address DepObjAddr = CGF.Builder.CreateGEP(
4078 CGF, Base.getAddress(),
4079 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4080 LValue NumDepsBase = CGF.MakeAddrLValue(
4081 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4082 // NumDeps = deps[i].base_addr;
4083 LValue BaseAddrLVal = CGF.EmitLValueForField(
4084 NumDepsBase,
4085 *std::next(KmpDependInfoRD->field_begin(),
4086 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4087 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4088 return std::make_pair(NumDeps, Base);
4089}
4090
4091static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4092 llvm::PointerUnion<unsigned *, LValue *> Pos,
4094 Address DependenciesArray) {
4095 CodeGenModule &CGM = CGF.CGM;
4096 ASTContext &C = CGM.getContext();
4097 QualType FlagsTy;
4098 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4099 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4100 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4101
4102 OMPIteratorGeneratorScope IteratorScope(
4103 CGF, cast_or_null<OMPIteratorExpr>(
4104 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4105 : nullptr));
4106 for (const Expr *E : Data.DepExprs) {
4107 llvm::Value *Addr;
4108 llvm::Value *Size;
4109
4110 // The expression will be a nullptr in the 'omp_all_memory' case.
4111 if (E) {
4112 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4113 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4114 } else {
4115 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4116 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4117 }
4118 LValue Base;
4119 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4120 Base = CGF.MakeAddrLValue(
4121 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4122 } else {
4123 assert(E && "Expected a non-null expression");
4124 LValue &PosLVal = *cast<LValue *>(Pos);
4125 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4126 Base = CGF.MakeAddrLValue(
4127 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4128 }
4129 // deps[i].base_addr = &<Dependencies[i].second>;
4130 LValue BaseAddrLVal = CGF.EmitLValueForField(
4131 Base,
4132 *std::next(KmpDependInfoRD->field_begin(),
4133 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4134 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4135 // deps[i].len = sizeof(<Dependencies[i].second>);
4136 LValue LenLVal = CGF.EmitLValueForField(
4137 Base, *std::next(KmpDependInfoRD->field_begin(),
4138 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4139 CGF.EmitStoreOfScalar(Size, LenLVal);
4140 // deps[i].flags = <Dependencies[i].first>;
4141 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4142 LValue FlagsLVal = CGF.EmitLValueForField(
4143 Base,
4144 *std::next(KmpDependInfoRD->field_begin(),
4145 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4147 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4148 FlagsLVal);
4149 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4150 ++(*P);
4151 } else {
4152 LValue &PosLVal = *cast<LValue *>(Pos);
4153 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4154 Idx = CGF.Builder.CreateNUWAdd(Idx,
4155 llvm::ConstantInt::get(Idx->getType(), 1));
4156 CGF.EmitStoreOfScalar(Idx, PosLVal);
4157 }
4158 }
4159}
4160
4164 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4165 "Expected depobj dependency kind.");
4167 SmallVector<LValue, 4> SizeLVals;
4168 ASTContext &C = CGF.getContext();
4169 {
4170 OMPIteratorGeneratorScope IteratorScope(
4171 CGF, cast_or_null<OMPIteratorExpr>(
4172 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4173 : nullptr));
4174 for (const Expr *E : Data.DepExprs) {
4175 llvm::Value *NumDeps;
4176 LValue Base;
4177 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4178 std::tie(NumDeps, Base) =
4179 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4180 LValue NumLVal = CGF.MakeAddrLValue(
4181 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4182 C.getUIntPtrType());
4183 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4184 NumLVal.getAddress());
4185 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4186 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4187 CGF.EmitStoreOfScalar(Add, NumLVal);
4188 SizeLVals.push_back(NumLVal);
4189 }
4190 }
4191 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4192 llvm::Value *Size =
4193 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4194 Sizes.push_back(Size);
4195 }
4196 return Sizes;
4197}
4198
4201 LValue PosLVal,
4203 Address DependenciesArray) {
4204 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4205 "Expected depobj dependency kind.");
4206 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4207 {
4208 OMPIteratorGeneratorScope IteratorScope(
4209 CGF, cast_or_null<OMPIteratorExpr>(
4210 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4211 : nullptr));
4212 for (const Expr *E : Data.DepExprs) {
4213 llvm::Value *NumDeps;
4214 LValue Base;
4215 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4216 std::tie(NumDeps, Base) =
4217 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4218
4219 // memcopy dependency data.
4220 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4221 ElSize,
4222 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4223 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4224 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4225 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4226
4227 // Increase pos.
4228 // pos += size;
4229 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4230 CGF.EmitStoreOfScalar(Add, PosLVal);
4231 }
4232 }
4233}
4234
4235std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4237 SourceLocation Loc) {
4238 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4239 return D.DepExprs.empty();
4240 }))
4241 return std::make_pair(nullptr, Address::invalid());
4242 // Process list of dependencies.
4243 ASTContext &C = CGM.getContext();
4244 Address DependenciesArray = Address::invalid();
4245 llvm::Value *NumOfElements = nullptr;
4246 unsigned NumDependencies = std::accumulate(
4247 Dependencies.begin(), Dependencies.end(), 0,
4248 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4249 return D.DepKind == OMPC_DEPEND_depobj
4250 ? V
4251 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4252 });
4253 QualType FlagsTy;
4254 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4255 bool HasDepobjDeps = false;
4256 bool HasRegularWithIterators = false;
4257 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4258 llvm::Value *NumOfRegularWithIterators =
4259 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4260 // Calculate number of depobj dependencies and regular deps with the
4261 // iterators.
4262 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4263 if (D.DepKind == OMPC_DEPEND_depobj) {
4266 for (llvm::Value *Size : Sizes) {
4267 NumOfDepobjElements =
4268 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4269 }
4270 HasDepobjDeps = true;
4271 continue;
4272 }
4273 // Include number of iterations, if any.
4274
4275 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4276 llvm::Value *ClauseIteratorSpace =
4277 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4278 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4279 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4280 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4281 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4282 }
4283 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4284 ClauseIteratorSpace,
4285 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4286 NumOfRegularWithIterators =
4287 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4288 HasRegularWithIterators = true;
4289 continue;
4290 }
4291 }
4292
4293 QualType KmpDependInfoArrayTy;
4294 if (HasDepobjDeps || HasRegularWithIterators) {
4295 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4296 /*isSigned=*/false);
4297 if (HasDepobjDeps) {
4298 NumOfElements =
4299 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4300 }
4301 if (HasRegularWithIterators) {
4302 NumOfElements =
4303 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4304 }
4305 auto *OVE = new (C) OpaqueValueExpr(
4306 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4307 VK_PRValue);
4308 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4309 RValue::get(NumOfElements));
4310 KmpDependInfoArrayTy =
4311 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4312 /*IndexTypeQuals=*/0);
4313 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4314 // Properly emit variable-sized array.
4315 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4317 CGF.EmitVarDecl(*PD);
4318 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4319 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4320 /*isSigned=*/false);
4321 } else {
4322 KmpDependInfoArrayTy = C.getConstantArrayType(
4323 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4324 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4325 DependenciesArray =
4326 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4327 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4328 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4329 /*isSigned=*/false);
4330 }
4331 unsigned Pos = 0;
4332 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4333 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4334 continue;
4335 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4336 }
4337 // Copy regular dependencies with iterators.
4338 LValue PosLVal = CGF.MakeAddrLValue(
4339 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4340 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4341 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4342 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4343 continue;
4344 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4345 }
4346 // Copy final depobj arrays without iterators.
4347 if (HasDepobjDeps) {
4348 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4349 if (Dep.DepKind != OMPC_DEPEND_depobj)
4350 continue;
4351 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4352 }
4353 }
4354 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4355 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4356 return std::make_pair(NumOfElements, DependenciesArray);
4357}
4358
4360 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4361 SourceLocation Loc) {
4362 if (Dependencies.DepExprs.empty())
4363 return Address::invalid();
4364 // Process list of dependencies.
4365 ASTContext &C = CGM.getContext();
4366 Address DependenciesArray = Address::invalid();
4367 unsigned NumDependencies = Dependencies.DepExprs.size();
4368 QualType FlagsTy;
4369 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4370 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4371
4372 llvm::Value *Size;
4373 // Define type kmp_depend_info[<Dependencies.size()>];
4374 // For depobj reserve one extra element to store the number of elements.
4375 // It is required to handle depobj(x) update(in) construct.
4376 // kmp_depend_info[<Dependencies.size()>] deps;
4377 llvm::Value *NumDepsVal;
4378 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4379 if (const auto *IE =
4380 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4381 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4382 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4383 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4384 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4385 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4386 }
4387 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4388 NumDepsVal);
4389 CharUnits SizeInBytes =
4390 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4391 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4392 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4393 NumDepsVal =
4394 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4395 } else {
4396 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4397 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4398 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4399 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4400 Size = CGM.getSize(Sz.alignTo(Align));
4401 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4402 }
4403 // Need to allocate on the dynamic memory.
4404 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4405 // Use default allocator.
4406 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4407 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4408
4409 llvm::Value *Addr =
4410 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4411 CGM.getModule(), OMPRTL___kmpc_alloc),
4412 Args, ".dep.arr.addr");
4413 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4415 Addr, CGF.Builder.getPtrTy(0));
4416 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4417 // Write number of elements in the first element of array for depobj.
4418 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4419 // deps[i].base_addr = NumDependencies;
4420 LValue BaseAddrLVal = CGF.EmitLValueForField(
4421 Base,
4422 *std::next(KmpDependInfoRD->field_begin(),
4423 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4424 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4425 llvm::PointerUnion<unsigned *, LValue *> Pos;
4426 unsigned Idx = 1;
4427 LValue PosLVal;
4428 if (Dependencies.IteratorExpr) {
4429 PosLVal = CGF.MakeAddrLValue(
4430 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4431 C.getSizeType());
4432 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4433 /*IsInit=*/true);
4434 Pos = &PosLVal;
4435 } else {
4436 Pos = &Idx;
4437 }
4438 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4439 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4440 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4441 CGF.Int8Ty);
4442 return DependenciesArray;
4443}
4444
4446 SourceLocation Loc) {
4447 ASTContext &C = CGM.getContext();
4448 QualType FlagsTy;
4449 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4450 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4451 C.VoidPtrTy.castAs<PointerType>());
4452 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4454 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4456 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4457 Addr.getElementType(), Addr.emitRawPointer(CGF),
4458 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4459 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4460 CGF.VoidPtrTy);
4461 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4462 // Use default allocator.
4463 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4464 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4465
4466 // _kmpc_free(gtid, addr, nullptr);
4467 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4468 CGM.getModule(), OMPRTL___kmpc_free),
4469 Args);
4470}
4471
4473 OpenMPDependClauseKind NewDepKind,
4474 SourceLocation Loc) {
4475 ASTContext &C = CGM.getContext();
4476 QualType FlagsTy;
4477 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4478 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4479 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4480 llvm::Value *NumDeps;
4481 LValue Base;
4482 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4483
4484 Address Begin = Base.getAddress();
4485 // Cast from pointer to array type to pointer to single element.
4486 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4487 Begin.emitRawPointer(CGF), NumDeps);
4488 // The basic structure here is a while-do loop.
4489 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4490 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4491 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4492 CGF.EmitBlock(BodyBB);
4493 llvm::PHINode *ElementPHI =
4494 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4495 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4496 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4497 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4498 Base.getTBAAInfo());
4499 // deps[i].flags = NewDepKind;
4500 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4501 LValue FlagsLVal = CGF.EmitLValueForField(
4502 Base, *std::next(KmpDependInfoRD->field_begin(),
4503 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4505 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4506 FlagsLVal);
4507
4508 // Shift the address forward by one element.
4509 llvm::Value *ElementNext =
4510 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4511 .emitRawPointer(CGF);
4512 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4513 llvm::Value *IsEmpty =
4514 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4515 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4516 // Done.
4517 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4518}
4519
4521 const OMPExecutableDirective &D,
4522 llvm::Function *TaskFunction,
4523 QualType SharedsTy, Address Shareds,
4524 const Expr *IfCond,
4525 const OMPTaskDataTy &Data) {
4526 if (!CGF.HaveInsertPoint())
4527 return;
4528
4530 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4531 llvm::Value *NewTask = Result.NewTask;
4532 llvm::Function *TaskEntry = Result.TaskEntry;
4533 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4534 LValue TDBase = Result.TDBase;
4535 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4536 // Process list of dependences.
4537 Address DependenciesArray = Address::invalid();
4538 llvm::Value *NumOfElements;
4539 std::tie(NumOfElements, DependenciesArray) =
4540 emitDependClause(CGF, Data.Dependences, Loc);
4541
4542 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4543 // libcall.
4544 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4545 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4546 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4547 // list is not empty
4548 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4549 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4550 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4551 llvm::Value *DepTaskArgs[7];
4552 if (!Data.Dependences.empty()) {
4553 DepTaskArgs[0] = UpLoc;
4554 DepTaskArgs[1] = ThreadID;
4555 DepTaskArgs[2] = NewTask;
4556 DepTaskArgs[3] = NumOfElements;
4557 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4558 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4559 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4560 }
4561 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4562 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4563 if (!Data.Tied) {
4564 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4565 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4566 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4567 }
4568 if (!Data.Dependences.empty()) {
4569 CGF.EmitRuntimeCall(
4570 OMPBuilder.getOrCreateRuntimeFunction(
4571 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4572 DepTaskArgs);
4573 } else {
4574 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4575 CGM.getModule(), OMPRTL___kmpc_omp_task),
4576 TaskArgs);
4577 }
4578 // Check if parent region is untied and build return for untied task;
4579 if (auto *Region =
4580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4581 Region->emitUntiedSwitch(CGF);
4582 };
4583
4584 llvm::Value *DepWaitTaskArgs[7];
4585 if (!Data.Dependences.empty()) {
4586 DepWaitTaskArgs[0] = UpLoc;
4587 DepWaitTaskArgs[1] = ThreadID;
4588 DepWaitTaskArgs[2] = NumOfElements;
4589 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4590 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4591 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4592 DepWaitTaskArgs[6] =
4593 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4594 }
4595 auto &M = CGM.getModule();
4596 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4597 TaskEntry, &Data, &DepWaitTaskArgs,
4598 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4599 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4600 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4601 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4602 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4603 // is specified.
4604 if (!Data.Dependences.empty())
4605 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4606 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4607 DepWaitTaskArgs);
4608 // Call proxy_task_entry(gtid, new_task);
4609 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4610 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4611 Action.Enter(CGF);
4612 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4613 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4614 OutlinedFnArgs);
4615 };
4616
4617 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4618 // kmp_task_t *new_task);
4619 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4620 // kmp_task_t *new_task);
4622 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4623 M, OMPRTL___kmpc_omp_task_begin_if0),
4624 TaskArgs,
4625 OMPBuilder.getOrCreateRuntimeFunction(
4626 M, OMPRTL___kmpc_omp_task_complete_if0),
4627 TaskArgs);
4628 RCG.setAction(Action);
4629 RCG(CGF);
4630 };
4631
4632 if (IfCond) {
4633 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4634 } else {
4635 RegionCodeGenTy ThenRCG(ThenCodeGen);
4636 ThenRCG(CGF);
4637 }
4638}
4639
4641 const OMPLoopDirective &D,
4642 llvm::Function *TaskFunction,
4643 QualType SharedsTy, Address Shareds,
4644 const Expr *IfCond,
4645 const OMPTaskDataTy &Data) {
4646 if (!CGF.HaveInsertPoint())
4647 return;
4649 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4650 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4651 // libcall.
4652 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4653 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4654 // sched, kmp_uint64 grainsize, void *task_dup);
4655 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4656 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4657 llvm::Value *IfVal;
4658 if (IfCond) {
4659 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4660 /*isSigned=*/true);
4661 } else {
4662 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4663 }
4664
4665 LValue LBLVal = CGF.EmitLValueForField(
4666 Result.TDBase,
4667 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4668 const auto *LBVar =
4669 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4670 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4671 /*IsInitializer=*/true);
4672 LValue UBLVal = CGF.EmitLValueForField(
4673 Result.TDBase,
4674 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4675 const auto *UBVar =
4676 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4677 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4678 /*IsInitializer=*/true);
4679 LValue StLVal = CGF.EmitLValueForField(
4680 Result.TDBase,
4681 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4682 const auto *StVar =
4683 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4684 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4685 /*IsInitializer=*/true);
4686 // Store reductions address.
4687 LValue RedLVal = CGF.EmitLValueForField(
4688 Result.TDBase,
4689 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4690 if (Data.Reductions) {
4691 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4692 } else {
4693 CGF.EmitNullInitialization(RedLVal.getAddress(),
4694 CGF.getContext().VoidPtrTy);
4695 }
4696 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4698 UpLoc,
4699 ThreadID,
4700 Result.NewTask,
4701 IfVal,
4702 LBLVal.getPointer(CGF),
4703 UBLVal.getPointer(CGF),
4704 CGF.EmitLoadOfScalar(StLVal, Loc),
4705 llvm::ConstantInt::getSigned(
4706 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4707 llvm::ConstantInt::getSigned(
4708 CGF.IntTy, Data.Schedule.getPointer()
4709 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4710 : NoSchedule),
4711 Data.Schedule.getPointer()
4712 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4713 /*isSigned=*/false)
4714 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4715 if (Data.HasModifier)
4716 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4717
4718 TaskArgs.push_back(Result.TaskDupFn
4720 Result.TaskDupFn, CGF.VoidPtrTy)
4721 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4722 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4723 CGM.getModule(), Data.HasModifier
4724 ? OMPRTL___kmpc_taskloop_5
4725 : OMPRTL___kmpc_taskloop),
4726 TaskArgs);
4727}
4728
4729/// Emit reduction operation for each element of array (required for
4730/// array sections) LHS op = RHS.
4731/// \param Type Type of array.
4732/// \param LHSVar Variable on the left side of the reduction operation
4733/// (references element of array in original variable).
4734/// \param RHSVar Variable on the right side of the reduction operation
4735/// (references element of array in original variable).
4736/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4737/// RHSVar.
4739 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4740 const VarDecl *RHSVar,
4741 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4742 const Expr *, const Expr *)> &RedOpGen,
4743 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4744 const Expr *UpExpr = nullptr) {
4745 // Perform element-by-element initialization.
4746 QualType ElementTy;
4747 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4748 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4749
4750 // Drill down to the base element type on both arrays.
4751 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4752 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4753
4754 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4755 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4756 // Cast from pointer to array type to pointer to single element.
4757 llvm::Value *LHSEnd =
4758 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4759 // The basic structure here is a while-do loop.
4760 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4761 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4762 llvm::Value *IsEmpty =
4763 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4764 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4765
4766 // Enter the loop body, making that address the current address.
4767 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4768 CGF.EmitBlock(BodyBB);
4769
4770 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4771
4772 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4773 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4774 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4775 Address RHSElementCurrent(
4776 RHSElementPHI, RHSAddr.getElementType(),
4777 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4778
4779 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4780 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4781 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4782 Address LHSElementCurrent(
4783 LHSElementPHI, LHSAddr.getElementType(),
4784 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4785
4786 // Emit copy.
4788 Scope.addPrivate(LHSVar, LHSElementCurrent);
4789 Scope.addPrivate(RHSVar, RHSElementCurrent);
4790 Scope.Privatize();
4791 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4792 Scope.ForceCleanup();
4793
4794 // Shift the address forward by one element.
4795 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4796 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4797 "omp.arraycpy.dest.element");
4798 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4799 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4800 "omp.arraycpy.src.element");
4801 // Check whether we've reached the end.
4802 llvm::Value *Done =
4803 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4804 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4805 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4806 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4807
4808 // Done.
4809 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4810}
4811
4812/// Emit reduction combiner. If the combiner is a simple expression emit it as
4813/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4814/// UDR combiner function.
4816 const Expr *ReductionOp) {
4817 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4818 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4819 if (const auto *DRE =
4820 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4821 if (const auto *DRD =
4822 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4823 std::pair<llvm::Function *, llvm::Function *> Reduction =
4827 CGF.EmitIgnoredExpr(ReductionOp);
4828 return;
4829 }
4830 CGF.EmitIgnoredExpr(ReductionOp);
4831}
4832
4834 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4836 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4837 ASTContext &C = CGM.getContext();
4838
4839 // void reduction_func(void *LHSArg, void *RHSArg);
4840 FunctionArgList Args;
4841 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4843 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4845 Args.push_back(&LHSArg);
4846 Args.push_back(&RHSArg);
4847 const auto &CGFI =
4848 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4849 std::string Name = getReductionFuncName(ReducerName);
4850 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4851 llvm::GlobalValue::InternalLinkage, Name,
4852 &CGM.getModule());
4853 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4854 Fn->setDoesNotRecurse();
4855 CodeGenFunction CGF(CGM);
4856 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4857
4858 // Dst = (void*[n])(LHSArg);
4859 // Src = (void*[n])(RHSArg);
4861 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4862 CGF.Builder.getPtrTy(0)),
4863 ArgsElemType, CGF.getPointerAlign());
4865 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4866 CGF.Builder.getPtrTy(0)),
4867 ArgsElemType, CGF.getPointerAlign());
4868
4869 // ...
4870 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4871 // ...
4873 const auto *IPriv = Privates.begin();
4874 unsigned Idx = 0;
4875 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4876 const auto *RHSVar =
4877 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4878 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4879 const auto *LHSVar =
4880 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4881 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4882 QualType PrivTy = (*IPriv)->getType();
4883 if (PrivTy->isVariablyModifiedType()) {
4884 // Get array size and emit VLA type.
4885 ++Idx;
4886 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4887 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4888 const VariableArrayType *VLA =
4889 CGF.getContext().getAsVariableArrayType(PrivTy);
4890 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4892 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4893 CGF.EmitVariablyModifiedType(PrivTy);
4894 }
4895 }
4896 Scope.Privatize();
4897 IPriv = Privates.begin();
4898 const auto *ILHS = LHSExprs.begin();
4899 const auto *IRHS = RHSExprs.begin();
4900 for (const Expr *E : ReductionOps) {
4901 if ((*IPriv)->getType()->isArrayType()) {
4902 // Emit reduction for array section.
4903 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4904 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4906 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4907 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4908 emitReductionCombiner(CGF, E);
4909 });
4910 } else {
4911 // Emit reduction for array subscript or single variable.
4912 emitReductionCombiner(CGF, E);
4913 }
4914 ++IPriv;
4915 ++ILHS;
4916 ++IRHS;
4917 }
4918 Scope.ForceCleanup();
4919 CGF.FinishFunction();
4920 return Fn;
4921}
4922
4924 const Expr *ReductionOp,
4925 const Expr *PrivateRef,
4926 const DeclRefExpr *LHS,
4927 const DeclRefExpr *RHS) {
4928 if (PrivateRef->getType()->isArrayType()) {
4929 // Emit reduction for array section.
4930 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4931 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4933 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4934 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4935 emitReductionCombiner(CGF, ReductionOp);
4936 });
4937 } else {
4938 // Emit reduction for array subscript or single variable.
4939 emitReductionCombiner(CGF, ReductionOp);
4940 }
4941}
4942
4943static std::string generateUniqueName(CodeGenModule &CGM,
4944 llvm::StringRef Prefix, const Expr *Ref);
4945
4947 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4948 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4949
4950 // Create a shared global variable (__shared_reduction_var) to accumulate the
4951 // final result.
4952 //
4953 // Call __kmpc_barrier to synchronize threads before initialization.
4954 //
4955 // The master thread (thread_id == 0) initializes __shared_reduction_var
4956 // with the identity value or initializer.
4957 //
4958 // Call __kmpc_barrier to synchronize before combining.
4959 // For each i:
4960 // - Thread enters critical section.
4961 // - Reads its private value from LHSExprs[i].
4962 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4963 // Privates[i]).
4964 // - Exits critical section.
4965 //
4966 // Call __kmpc_barrier after combining.
4967 //
4968 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4969 //
4970 // Final __kmpc_barrier to synchronize after broadcasting
4971 QualType PrivateType = Privates->getType();
4972 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4973
4974 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4975 std::string ReductionVarNameStr;
4976 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4977 ReductionVarNameStr =
4978 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4979 else
4980 ReductionVarNameStr = "unnamed_priv_var";
4981
4982 // Create an internal shared variable
4983 std::string SharedName =
4984 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4985 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4986 LLVMType, ".omp.reduction." + SharedName);
4987
4988 SharedVar->setAlignment(
4989 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4990
4991 Address SharedResult =
4992 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4993
4994 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4995 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4996 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4997
4998 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4999 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5000
5001 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5002 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5003 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5004
5005 CGF.EmitBlock(InitBB);
5006
5007 auto EmitSharedInit = [&]() {
5008 if (UDR) { // Check if it's a User-Defined Reduction
5009 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5010 std::pair<llvm::Function *, llvm::Function *> FnPair =
5012 llvm::Function *InitializerFn = FnPair.second;
5013 if (InitializerFn) {
5014 if (const auto *CE =
5015 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5016 const auto *OutDRE = cast<DeclRefExpr>(
5017 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5018 ->getSubExpr());
5019 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5020
5021 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5022 LocalScope.addPrivate(OutVD, SharedResult);
5023
5024 (void)LocalScope.Privatize();
5025 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5026 CE->getCallee()->IgnoreParenImpCasts())) {
5028 CGF, OVE, RValue::get(InitializerFn));
5029 CGF.EmitIgnoredExpr(CE);
5030 } else {
5031 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5032 PrivateType.getQualifiers(),
5033 /*IsInitializer=*/true);
5034 }
5035 } else {
5036 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5037 PrivateType.getQualifiers(),
5038 /*IsInitializer=*/true);
5039 }
5040 } else {
5041 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5042 PrivateType.getQualifiers(),
5043 /*IsInitializer=*/true);
5044 }
5045 } else {
5046 // EmitNullInitialization handles default construction for C++ classes
5047 // and zeroing for scalars, which is a reasonable default.
5048 CGF.EmitNullInitialization(SharedResult, PrivateType);
5049 }
5050 return; // UDR initialization handled
5051 }
5052 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5053 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5054 if (const Expr *InitExpr = VD->getInit()) {
5055 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5056 PrivateType.getQualifiers(), true);
5057 return;
5058 }
5059 }
5060 }
5061 CGF.EmitNullInitialization(SharedResult, PrivateType);
5062 };
5063 EmitSharedInit();
5064 CGF.Builder.CreateBr(InitEndBB);
5065 CGF.EmitBlock(InitEndBB);
5066
5067 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5068 CGM.getModule(), OMPRTL___kmpc_barrier),
5069 BarrierArgs);
5070
5071 const Expr *ReductionOp = ReductionOps;
5072 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5073 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5074 LValue LHSLV = CGF.EmitLValue(Privates);
5075
5076 auto EmitCriticalReduction = [&](auto ReductionGen) {
5077 std::string CriticalName = getName({"reduction_critical"});
5078 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5079 };
5080
5081 if (CurrentUDR) {
5082 // Handle user-defined reduction.
5083 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5084 Action.Enter(CGF);
5085 std::pair<llvm::Function *, llvm::Function *> FnPair =
5086 getUserDefinedReduction(CurrentUDR);
5087 if (FnPair.first) {
5088 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5089 const auto *OutDRE = cast<DeclRefExpr>(
5090 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5091 ->getSubExpr());
5092 const auto *InDRE = cast<DeclRefExpr>(
5093 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5094 ->getSubExpr());
5095 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5096 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5097 SharedLV.getAddress());
5098 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5099 LHSLV.getAddress());
5100 (void)LocalScope.Privatize();
5101 emitReductionCombiner(CGF, ReductionOp);
5102 }
5103 }
5104 };
5105 EmitCriticalReduction(ReductionGen);
5106 } else {
5107 // Handle built-in reduction operations.
5108#ifndef NDEBUG
5109 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5110 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5111 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5112
5113 const Expr *AssignRHS = nullptr;
5114 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5115 if (BinOp->getOpcode() == BO_Assign)
5116 AssignRHS = BinOp->getRHS();
5117 } else if (const auto *OpCall =
5118 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5119 if (OpCall->getOperator() == OO_Equal)
5120 AssignRHS = OpCall->getArg(1);
5121 }
5122
5123 assert(AssignRHS &&
5124 "Private Variable Reduction : Invalid ReductionOp expression");
5125#endif
5126
5127 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5128 Action.Enter(CGF);
5129 const auto *OmpOutDRE =
5130 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5131 const auto *OmpInDRE =
5132 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5133 assert(
5134 OmpOutDRE && OmpInDRE &&
5135 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5136 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5137 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5138 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5139 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5140 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5141 (void)LocalScope.Privatize();
5142 // Emit the actual reduction operation
5143 CGF.EmitIgnoredExpr(ReductionOp);
5144 };
5145 EmitCriticalReduction(ReductionGen);
5146 }
5147
5148 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5149 CGM.getModule(), OMPRTL___kmpc_barrier),
5150 BarrierArgs);
5151
5152 // Broadcast final result
5153 bool IsAggregate = PrivateType->isAggregateType();
5154 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5155 llvm::Value *FinalResultVal = nullptr;
5156 Address FinalResultAddr = Address::invalid();
5157
5158 if (IsAggregate)
5159 FinalResultAddr = SharedResult;
5160 else
5161 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5162
5163 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5164 if (IsAggregate) {
5165 CGF.EmitAggregateCopy(TargetLHSLV,
5166 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5167 PrivateType, AggValueSlot::DoesNotOverlap, false);
5168 } else {
5169 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5170 }
5171 // Final synchronization barrier
5172 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5173 CGM.getModule(), OMPRTL___kmpc_barrier),
5174 BarrierArgs);
5175
5176 // Combiner with original list item
5177 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5178 PrePostActionTy &Action) {
5179 Action.Enter(CGF);
5180 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5181 cast<DeclRefExpr>(LHSExprs),
5182 cast<DeclRefExpr>(RHSExprs));
5183 };
5184 EmitCriticalReduction(OriginalListCombiner);
5185}
5186
5188 ArrayRef<const Expr *> OrgPrivates,
5189 ArrayRef<const Expr *> OrgLHSExprs,
5190 ArrayRef<const Expr *> OrgRHSExprs,
5191 ArrayRef<const Expr *> OrgReductionOps,
5192 ReductionOptionsTy Options) {
5193 if (!CGF.HaveInsertPoint())
5194 return;
5195
5196 bool WithNowait = Options.WithNowait;
5197 bool SimpleReduction = Options.SimpleReduction;
5198
5199 // Next code should be emitted for reduction:
5200 //
5201 // static kmp_critical_name lock = { 0 };
5202 //
5203 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5204 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5205 // ...
5206 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5207 // *(Type<n>-1*)rhs[<n>-1]);
5208 // }
5209 //
5210 // ...
5211 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5212 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5213 // RedList, reduce_func, &<lock>)) {
5214 // case 1:
5215 // ...
5216 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5217 // ...
5218 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5219 // break;
5220 // case 2:
5221 // ...
5222 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5223 // ...
5224 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5225 // break;
5226 // default:;
5227 // }
5228 //
5229 // if SimpleReduction is true, only the next code is generated:
5230 // ...
5231 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5232 // ...
5233
5234 ASTContext &C = CGM.getContext();
5235
5236 if (SimpleReduction) {
5238 const auto *IPriv = OrgPrivates.begin();
5239 const auto *ILHS = OrgLHSExprs.begin();
5240 const auto *IRHS = OrgRHSExprs.begin();
5241 for (const Expr *E : OrgReductionOps) {
5242 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5243 cast<DeclRefExpr>(*IRHS));
5244 ++IPriv;
5245 ++ILHS;
5246 ++IRHS;
5247 }
5248 return;
5249 }
5250
5251 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5252 // Only keep entries where the corresponding variable is not private.
5253 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5254 FilteredRHSExprs, FilteredReductionOps;
5255 for (unsigned I : llvm::seq<unsigned>(
5256 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5257 if (!Options.IsPrivateVarReduction[I]) {
5258 FilteredPrivates.emplace_back(OrgPrivates[I]);
5259 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5260 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5261 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5262 }
5263 }
5264 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5265 // processing.
5266 ArrayRef<const Expr *> Privates = FilteredPrivates;
5267 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5268 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5269 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5270
5271 // 1. Build a list of reduction variables.
5272 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5273 auto Size = RHSExprs.size();
5274 for (const Expr *E : Privates) {
5275 if (E->getType()->isVariablyModifiedType())
5276 // Reserve place for array size.
5277 ++Size;
5278 }
5279 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5280 QualType ReductionArrayTy = C.getConstantArrayType(
5281 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5282 /*IndexTypeQuals=*/0);
5283 RawAddress ReductionList =
5284 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5285 const auto *IPriv = Privates.begin();
5286 unsigned Idx = 0;
5287 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5288 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5289 CGF.Builder.CreateStore(
5291 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5292 Elem);
5293 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5294 // Store array size.
5295 ++Idx;
5296 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5297 llvm::Value *Size = CGF.Builder.CreateIntCast(
5298 CGF.getVLASize(
5299 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5300 .NumElts,
5301 CGF.SizeTy, /*isSigned=*/false);
5302 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5303 Elem);
5304 }
5305 }
5306
5307 // 2. Emit reduce_func().
5308 llvm::Function *ReductionFn = emitReductionFunction(
5309 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5310 Privates, LHSExprs, RHSExprs, ReductionOps);
5311
5312 // 3. Create static kmp_critical_name lock = { 0 };
5313 std::string Name = getName({"reduction"});
5314 llvm::Value *Lock = getCriticalRegionLock(Name);
5315
5316 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5317 // RedList, reduce_func, &<lock>);
5318 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5319 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5320 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5321 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5322 ReductionList.getPointer(), CGF.VoidPtrTy);
5323 llvm::Value *Args[] = {
5324 IdentTLoc, // ident_t *<loc>
5325 ThreadId, // i32 <gtid>
5326 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5327 ReductionArrayTySize, // size_type sizeof(RedList)
5328 RL, // void *RedList
5329 ReductionFn, // void (*) (void *, void *) <reduce_func>
5330 Lock // kmp_critical_name *&<lock>
5331 };
5332 llvm::Value *Res = CGF.EmitRuntimeCall(
5333 OMPBuilder.getOrCreateRuntimeFunction(
5334 CGM.getModule(),
5335 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5336 Args);
5337
5338 // 5. Build switch(res)
5339 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5340 llvm::SwitchInst *SwInst =
5341 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5342
5343 // 6. Build case 1:
5344 // ...
5345 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5346 // ...
5347 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5348 // break;
5349 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5350 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5351 CGF.EmitBlock(Case1BB);
5352
5353 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5354 llvm::Value *EndArgs[] = {
5355 IdentTLoc, // ident_t *<loc>
5356 ThreadId, // i32 <gtid>
5357 Lock // kmp_critical_name *&<lock>
5358 };
5359 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5360 CodeGenFunction &CGF, PrePostActionTy &Action) {
5362 const auto *IPriv = Privates.begin();
5363 const auto *ILHS = LHSExprs.begin();
5364 const auto *IRHS = RHSExprs.begin();
5365 for (const Expr *E : ReductionOps) {
5366 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5367 cast<DeclRefExpr>(*IRHS));
5368 ++IPriv;
5369 ++ILHS;
5370 ++IRHS;
5371 }
5372 };
5374 CommonActionTy Action(
5375 nullptr, {},
5376 OMPBuilder.getOrCreateRuntimeFunction(
5377 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5378 : OMPRTL___kmpc_end_reduce),
5379 EndArgs);
5380 RCG.setAction(Action);
5381 RCG(CGF);
5382
5383 CGF.EmitBranch(DefaultBB);
5384
5385 // 7. Build case 2:
5386 // ...
5387 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5388 // ...
5389 // break;
5390 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5391 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5392 CGF.EmitBlock(Case2BB);
5393
5394 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5395 CodeGenFunction &CGF, PrePostActionTy &Action) {
5396 const auto *ILHS = LHSExprs.begin();
5397 const auto *IRHS = RHSExprs.begin();
5398 const auto *IPriv = Privates.begin();
5399 for (const Expr *E : ReductionOps) {
5400 const Expr *XExpr = nullptr;
5401 const Expr *EExpr = nullptr;
5402 const Expr *UpExpr = nullptr;
5403 BinaryOperatorKind BO = BO_Comma;
5404 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5405 if (BO->getOpcode() == BO_Assign) {
5406 XExpr = BO->getLHS();
5407 UpExpr = BO->getRHS();
5408 }
5409 }
5410 // Try to emit update expression as a simple atomic.
5411 const Expr *RHSExpr = UpExpr;
5412 if (RHSExpr) {
5413 // Analyze RHS part of the whole expression.
5414 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5415 RHSExpr->IgnoreParenImpCasts())) {
5416 // If this is a conditional operator, analyze its condition for
5417 // min/max reduction operator.
5418 RHSExpr = ACO->getCond();
5419 }
5420 if (const auto *BORHS =
5421 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5422 EExpr = BORHS->getRHS();
5423 BO = BORHS->getOpcode();
5424 }
5425 }
5426 if (XExpr) {
5427 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5428 auto &&AtomicRedGen = [BO, VD,
5429 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5430 const Expr *EExpr, const Expr *UpExpr) {
5431 LValue X = CGF.EmitLValue(XExpr);
5432 RValue E;
5433 if (EExpr)
5434 E = CGF.EmitAnyExpr(EExpr);
5435 CGF.EmitOMPAtomicSimpleUpdateExpr(
5436 X, E, BO, /*IsXLHSInRHSPart=*/true,
5437 llvm::AtomicOrdering::Monotonic, Loc,
5438 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5439 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5440 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5441 CGF.emitOMPSimpleStore(
5442 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5443 VD->getType().getNonReferenceType(), Loc);
5444 PrivateScope.addPrivate(VD, LHSTemp);
5445 (void)PrivateScope.Privatize();
5446 return CGF.EmitAnyExpr(UpExpr);
5447 });
5448 };
5449 if ((*IPriv)->getType()->isArrayType()) {
5450 // Emit atomic reduction for array section.
5451 const auto *RHSVar =
5452 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5453 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5454 AtomicRedGen, XExpr, EExpr, UpExpr);
5455 } else {
5456 // Emit atomic reduction for array subscript or single variable.
5457 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5458 }
5459 } else {
5460 // Emit as a critical region.
5461 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5462 const Expr *, const Expr *) {
5463 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5464 std::string Name = RT.getName({"atomic_reduction"});
5466 CGF, Name,
5467 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5468 Action.Enter(CGF);
5469 emitReductionCombiner(CGF, E);
5470 },
5471 Loc);
5472 };
5473 if ((*IPriv)->getType()->isArrayType()) {
5474 const auto *LHSVar =
5475 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5476 const auto *RHSVar =
5477 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5478 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5479 CritRedGen);
5480 } else {
5481 CritRedGen(CGF, nullptr, nullptr, nullptr);
5482 }
5483 }
5484 ++ILHS;
5485 ++IRHS;
5486 ++IPriv;
5487 }
5488 };
5489 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5490 if (!WithNowait) {
5491 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5492 llvm::Value *EndArgs[] = {
5493 IdentTLoc, // ident_t *<loc>
5494 ThreadId, // i32 <gtid>
5495 Lock // kmp_critical_name *&<lock>
5496 };
5497 CommonActionTy Action(nullptr, {},
5498 OMPBuilder.getOrCreateRuntimeFunction(
5499 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5500 EndArgs);
5501 AtomicRCG.setAction(Action);
5502 AtomicRCG(CGF);
5503 } else {
5504 AtomicRCG(CGF);
5505 }
5506
5507 CGF.EmitBranch(DefaultBB);
5508 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5509 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5510 "PrivateVarReduction: Privates size mismatch");
5511 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5512 "PrivateVarReduction: ReductionOps size mismatch");
5513 for (unsigned I : llvm::seq<unsigned>(
5514 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5515 if (Options.IsPrivateVarReduction[I])
5516 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5517 OrgRHSExprs[I], OrgReductionOps[I]);
5518 }
5519}
5520
5521/// Generates unique name for artificial threadprivate variables.
5522/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5523static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5524 const Expr *Ref) {
5525 SmallString<256> Buffer;
5526 llvm::raw_svector_ostream Out(Buffer);
5527 const clang::DeclRefExpr *DE;
5528 const VarDecl *D = ::getBaseDecl(Ref, DE);
5529 if (!D)
5530 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5531 D = D->getCanonicalDecl();
5532 std::string Name = CGM.getOpenMPRuntime().getName(
5533 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5534 Out << Prefix << Name << "_"
5536 return std::string(Out.str());
5537}
5538
5539/// Emits reduction initializer function:
5540/// \code
5541/// void @.red_init(void* %arg, void* %orig) {
5542/// %0 = bitcast void* %arg to <type>*
5543/// store <type> <init>, <type>* %0
5544/// ret void
5545/// }
5546/// \endcode
5547static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5548 SourceLocation Loc,
5549 ReductionCodeGen &RCG, unsigned N) {
5550 ASTContext &C = CGM.getContext();
5551 QualType VoidPtrTy = C.VoidPtrTy;
5552 VoidPtrTy.addRestrict();
5553 FunctionArgList Args;
5554 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5556 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5558 Args.emplace_back(&Param);
5559 Args.emplace_back(&ParamOrig);
5560 const auto &FnInfo =
5561 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5562 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5563 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5564 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5565 Name, &CGM.getModule());
5566 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5567 Fn->setDoesNotRecurse();
5568 CodeGenFunction CGF(CGM);
5569 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5570 QualType PrivateType = RCG.getPrivateType(N);
5571 Address PrivateAddr = CGF.EmitLoadOfPointer(
5572 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5573 C.getPointerType(PrivateType)->castAs<PointerType>());
5574 llvm::Value *Size = nullptr;
5575 // If the size of the reduction item is non-constant, load it from global
5576 // threadprivate variable.
5577 if (RCG.getSizes(N).second) {
5579 CGF, CGM.getContext().getSizeType(),
5580 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5581 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5582 CGM.getContext().getSizeType(), Loc);
5583 }
5584 RCG.emitAggregateType(CGF, N, Size);
5585 Address OrigAddr = Address::invalid();
5586 // If initializer uses initializer from declare reduction construct, emit a
5587 // pointer to the address of the original reduction item (reuired by reduction
5588 // initializer)
5589 if (RCG.usesReductionInitializer(N)) {
5590 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5591 OrigAddr = CGF.EmitLoadOfPointer(
5592 SharedAddr,
5593 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5594 }
5595 // Emit the initializer:
5596 // %0 = bitcast void* %arg to <type>*
5597 // store <type> <init>, <type>* %0
5598 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5599 [](CodeGenFunction &) { return false; });
5600 CGF.FinishFunction();
5601 return Fn;
5602}
5603
5604/// Emits reduction combiner function:
5605/// \code
5606/// void @.red_comb(void* %arg0, void* %arg1) {
5607/// %lhs = bitcast void* %arg0 to <type>*
5608/// %rhs = bitcast void* %arg1 to <type>*
5609/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5610/// store <type> %2, <type>* %lhs
5611/// ret void
5612/// }
5613/// \endcode
5614static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5615 SourceLocation Loc,
5616 ReductionCodeGen &RCG, unsigned N,
5617 const Expr *ReductionOp,
5618 const Expr *LHS, const Expr *RHS,
5619 const Expr *PrivateRef) {
5620 ASTContext &C = CGM.getContext();
5621 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5622 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5623 FunctionArgList Args;
5624 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5625 C.VoidPtrTy, ImplicitParamKind::Other);
5626 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5628 Args.emplace_back(&ParamInOut);
5629 Args.emplace_back(&ParamIn);
5630 const auto &FnInfo =
5631 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5632 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5633 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5634 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5635 Name, &CGM.getModule());
5636 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5637 Fn->setDoesNotRecurse();
5638 CodeGenFunction CGF(CGM);
5639 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5640 llvm::Value *Size = nullptr;
5641 // If the size of the reduction item is non-constant, load it from global
5642 // threadprivate variable.
5643 if (RCG.getSizes(N).second) {
5645 CGF, CGM.getContext().getSizeType(),
5646 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5647 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5648 CGM.getContext().getSizeType(), Loc);
5649 }
5650 RCG.emitAggregateType(CGF, N, Size);
5651 // Remap lhs and rhs variables to the addresses of the function arguments.
5652 // %lhs = bitcast void* %arg0 to <type>*
5653 // %rhs = bitcast void* %arg1 to <type>*
5654 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5655 PrivateScope.addPrivate(
5656 LHSVD,
5657 // Pull out the pointer to the variable.
5659 CGF.GetAddrOfLocalVar(&ParamInOut)
5660 .withElementType(CGF.Builder.getPtrTy(0)),
5661 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5662 PrivateScope.addPrivate(
5663 RHSVD,
5664 // Pull out the pointer to the variable.
5666 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5667 CGF.Builder.getPtrTy(0)),
5668 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5669 PrivateScope.Privatize();
5670 // Emit the combiner body:
5671 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5672 // store <type> %2, <type>* %lhs
5674 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5675 cast<DeclRefExpr>(RHS));
5676 CGF.FinishFunction();
5677 return Fn;
5678}
5679
5680/// Emits reduction finalizer function:
5681/// \code
5682/// void @.red_fini(void* %arg) {
5683/// %0 = bitcast void* %arg to <type>*
5684/// <destroy>(<type>* %0)
5685/// ret void
5686/// }
5687/// \endcode
5688static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5689 SourceLocation Loc,
5690 ReductionCodeGen &RCG, unsigned N) {
5691 if (!RCG.needCleanups(N))
5692 return nullptr;
5693 ASTContext &C = CGM.getContext();
5694 FunctionArgList Args;
5695 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5697 Args.emplace_back(&Param);
5698 const auto &FnInfo =
5699 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5700 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5701 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5702 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5703 Name, &CGM.getModule());
5704 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5705 Fn->setDoesNotRecurse();
5706 CodeGenFunction CGF(CGM);
5707 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5708 Address PrivateAddr = CGF.EmitLoadOfPointer(
5709 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5710 llvm::Value *Size = nullptr;
5711 // If the size of the reduction item is non-constant, load it from global
5712 // threadprivate variable.
5713 if (RCG.getSizes(N).second) {
5715 CGF, CGM.getContext().getSizeType(),
5716 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5717 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5718 CGM.getContext().getSizeType(), Loc);
5719 }
5720 RCG.emitAggregateType(CGF, N, Size);
5721 // Emit the finalizer body:
5722 // <destroy>(<type>* %0)
5723 RCG.emitCleanups(CGF, N, PrivateAddr);
5724 CGF.FinishFunction(Loc);
5725 return Fn;
5726}
5727
5730 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5731 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5732 return nullptr;
5733
5734 // Build typedef struct:
5735 // kmp_taskred_input {
5736 // void *reduce_shar; // shared reduction item
5737 // void *reduce_orig; // original reduction item used for initialization
5738 // size_t reduce_size; // size of data item
5739 // void *reduce_init; // data initialization routine
5740 // void *reduce_fini; // data finalization routine
5741 // void *reduce_comb; // data combiner routine
5742 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5743 // } kmp_taskred_input_t;
5744 ASTContext &C = CGM.getContext();
5745 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5746 RD->startDefinition();
5747 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5748 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5749 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5750 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5751 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5752 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5753 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5754 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5755 RD->completeDefinition();
5756 CanQualType RDType = C.getCanonicalTagType(RD);
5757 unsigned Size = Data.ReductionVars.size();
5758 llvm::APInt ArraySize(/*numBits=*/64, Size);
5759 QualType ArrayRDType =
5760 C.getConstantArrayType(RDType, ArraySize, nullptr,
5761 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5762 // kmp_task_red_input_t .rd_input.[Size];
5763 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5764 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5765 Data.ReductionCopies, Data.ReductionOps);
5766 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5767 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5768 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5769 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5770 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5771 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5772 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5773 ".rd_input.gep.");
5774 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5775 // ElemLVal.reduce_shar = &Shareds[Cnt];
5776 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5777 RCG.emitSharedOrigLValue(CGF, Cnt);
5778 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5779 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5780 // ElemLVal.reduce_orig = &Origs[Cnt];
5781 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5782 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5783 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5784 RCG.emitAggregateType(CGF, Cnt);
5785 llvm::Value *SizeValInChars;
5786 llvm::Value *SizeVal;
5787 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5788 // We use delayed creation/initialization for VLAs and array sections. It is
5789 // required because runtime does not provide the way to pass the sizes of
5790 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5791 // threadprivate global variables are used to store these values and use
5792 // them in the functions.
5793 bool DelayedCreation = !!SizeVal;
5794 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5795 /*isSigned=*/false);
5796 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5797 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5798 // ElemLVal.reduce_init = init;
5799 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5800 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5801 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5802 // ElemLVal.reduce_fini = fini;
5803 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5804 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5805 llvm::Value *FiniAddr =
5806 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5807 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5808 // ElemLVal.reduce_comb = comb;
5809 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5810 llvm::Value *CombAddr = emitReduceCombFunction(
5811 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5812 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5813 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5814 // ElemLVal.flags = 0;
5815 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5816 if (DelayedCreation) {
5818 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5819 FlagsLVal);
5820 } else
5821 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5822 }
5823 if (Data.IsReductionWithTaskMod) {
5824 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5825 // is_ws, int num, void *data);
5826 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5827 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5828 CGM.IntTy, /*isSigned=*/true);
5829 llvm::Value *Args[] = {
5830 IdentTLoc, GTid,
5831 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5832 /*isSigned=*/true),
5833 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5835 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5836 return CGF.EmitRuntimeCall(
5837 OMPBuilder.getOrCreateRuntimeFunction(
5838 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5839 Args);
5840 }
5841 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5842 llvm::Value *Args[] = {
5843 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5844 /*isSigned=*/true),
5845 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5847 CGM.VoidPtrTy)};
5848 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5849 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5850 Args);
5851}
5852
5854 SourceLocation Loc,
5855 bool IsWorksharingReduction) {
5856 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5857 // is_ws, int num, void *data);
5858 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5859 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5860 CGM.IntTy, /*isSigned=*/true);
5861 llvm::Value *Args[] = {IdentTLoc, GTid,
5862 llvm::ConstantInt::get(CGM.IntTy,
5863 IsWorksharingReduction ? 1 : 0,
5864 /*isSigned=*/true)};
5865 (void)CGF.EmitRuntimeCall(
5866 OMPBuilder.getOrCreateRuntimeFunction(
5867 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5868 Args);
5869}
5870
5872 SourceLocation Loc,
5873 ReductionCodeGen &RCG,
5874 unsigned N) {
5875 auto Sizes = RCG.getSizes(N);
5876 // Emit threadprivate global variable if the type is non-constant
5877 // (Sizes.second = nullptr).
5878 if (Sizes.second) {
5879 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5880 /*isSigned=*/false);
5882 CGF, CGM.getContext().getSizeType(),
5883 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5884 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5885 }
5886}
5887
5889 SourceLocation Loc,
5890 llvm::Value *ReductionsPtr,
5891 LValue SharedLVal) {
5892 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5893 // *d);
5894 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5895 CGM.IntTy,
5896 /*isSigned=*/true),
5897 ReductionsPtr,
5899 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5900 return Address(
5901 CGF.EmitRuntimeCall(
5902 OMPBuilder.getOrCreateRuntimeFunction(
5903 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5904 Args),
5905 CGF.Int8Ty, SharedLVal.getAlignment());
5906}
5907
5909 const OMPTaskDataTy &Data) {
5910 if (!CGF.HaveInsertPoint())
5911 return;
5912
5913 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5914 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5915 OMPBuilder.createTaskwait(CGF.Builder);
5916 } else {
5917 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5918 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5919 auto &M = CGM.getModule();
5920 Address DependenciesArray = Address::invalid();
5921 llvm::Value *NumOfElements;
5922 std::tie(NumOfElements, DependenciesArray) =
5923 emitDependClause(CGF, Data.Dependences, Loc);
5924 if (!Data.Dependences.empty()) {
5925 llvm::Value *DepWaitTaskArgs[7];
5926 DepWaitTaskArgs[0] = UpLoc;
5927 DepWaitTaskArgs[1] = ThreadID;
5928 DepWaitTaskArgs[2] = NumOfElements;
5929 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5930 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5931 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5932 DepWaitTaskArgs[6] =
5933 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5934
5935 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5936
5937 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5938 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5939 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5940 // kmp_int32 has_no_wait); if dependence info is specified.
5941 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5942 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5943 DepWaitTaskArgs);
5944
5945 } else {
5946
5947 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5948 // global_tid);
5949 llvm::Value *Args[] = {UpLoc, ThreadID};
5950 // Ignore return result until untied tasks are supported.
5951 CGF.EmitRuntimeCall(
5952 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5953 Args);
5954 }
5955 }
5956
5957 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5958 Region->emitUntiedSwitch(CGF);
5959}
5960
5962 OpenMPDirectiveKind InnerKind,
5963 const RegionCodeGenTy &CodeGen,
5964 bool HasCancel) {
5965 if (!CGF.HaveInsertPoint())
5966 return;
5967 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5968 InnerKind != OMPD_critical &&
5969 InnerKind != OMPD_master &&
5970 InnerKind != OMPD_masked);
5971 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5972}
5973
5974namespace {
5975enum RTCancelKind {
5976 CancelNoreq = 0,
5977 CancelParallel = 1,
5978 CancelLoop = 2,
5979 CancelSections = 3,
5980 CancelTaskgroup = 4
5981};
5982} // anonymous namespace
5983
5984static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5985 RTCancelKind CancelKind = CancelNoreq;
5986 if (CancelRegion == OMPD_parallel)
5987 CancelKind = CancelParallel;
5988 else if (CancelRegion == OMPD_for)
5989 CancelKind = CancelLoop;
5990 else if (CancelRegion == OMPD_sections)
5991 CancelKind = CancelSections;
5992 else {
5993 assert(CancelRegion == OMPD_taskgroup);
5994 CancelKind = CancelTaskgroup;
5995 }
5996 return CancelKind;
5997}
5998
6001 OpenMPDirectiveKind CancelRegion) {
6002 if (!CGF.HaveInsertPoint())
6003 return;
6004 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6005 // global_tid, kmp_int32 cncl_kind);
6006 if (auto *OMPRegionInfo =
6007 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6008 // For 'cancellation point taskgroup', the task region info may not have a
6009 // cancel. This may instead happen in another adjacent task.
6010 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6011 llvm::Value *Args[] = {
6012 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6013 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6014 // Ignore return result until untied tasks are supported.
6015 llvm::Value *Result = CGF.EmitRuntimeCall(
6016 OMPBuilder.getOrCreateRuntimeFunction(
6017 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6018 Args);
6019 // if (__kmpc_cancellationpoint()) {
6020 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6021 // exit from construct;
6022 // }
6023 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6024 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6025 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6026 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6027 CGF.EmitBlock(ExitBB);
6028 if (CancelRegion == OMPD_parallel)
6029 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6030 // exit from construct;
6031 CodeGenFunction::JumpDest CancelDest =
6032 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6033 CGF.EmitBranchThroughCleanup(CancelDest);
6034 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6035 }
6036 }
6037}
6038
6040 const Expr *IfCond,
6041 OpenMPDirectiveKind CancelRegion) {
6042 if (!CGF.HaveInsertPoint())
6043 return;
6044 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6045 // kmp_int32 cncl_kind);
6046 auto &M = CGM.getModule();
6047 if (auto *OMPRegionInfo =
6048 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6049 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6050 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6051 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6052 llvm::Value *Args[] = {
6053 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6054 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6055 // Ignore return result until untied tasks are supported.
6056 llvm::Value *Result = CGF.EmitRuntimeCall(
6057 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6058 // if (__kmpc_cancel()) {
6059 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6060 // exit from construct;
6061 // }
6062 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6063 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6064 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6065 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6066 CGF.EmitBlock(ExitBB);
6067 if (CancelRegion == OMPD_parallel)
6068 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6069 // exit from construct;
6070 CodeGenFunction::JumpDest CancelDest =
6071 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6072 CGF.EmitBranchThroughCleanup(CancelDest);
6073 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6074 };
6075 if (IfCond) {
6076 emitIfClause(CGF, IfCond, ThenGen,
6077 [](CodeGenFunction &, PrePostActionTy &) {});
6078 } else {
6079 RegionCodeGenTy ThenRCG(ThenGen);
6080 ThenRCG(CGF);
6081 }
6082 }
6083}
6084
6085namespace {
6086/// Cleanup action for uses_allocators support.
6087class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6089
6090public:
6091 OMPUsesAllocatorsActionTy(
6092 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6093 : Allocators(Allocators) {}
6094 void Enter(CodeGenFunction &CGF) override {
6095 if (!CGF.HaveInsertPoint())
6096 return;
6097 for (const auto &AllocatorData : Allocators) {
6099 CGF, AllocatorData.first, AllocatorData.second);
6100 }
6101 }
6102 void Exit(CodeGenFunction &CGF) override {
6103 if (!CGF.HaveInsertPoint())
6104 return;
6105 for (const auto &AllocatorData : Allocators) {
6107 AllocatorData.first);
6108 }
6109 }
6110};
6111} // namespace
6112
6114 const OMPExecutableDirective &D, StringRef ParentName,
6115 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6116 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6117 assert(!ParentName.empty() && "Invalid target entry parent name!");
6120 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6121 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6122 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6123 if (!D.AllocatorTraits)
6124 continue;
6125 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6126 }
6127 }
6128 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6129 CodeGen.setAction(UsesAllocatorAction);
6130 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6131 IsOffloadEntry, CodeGen);
6132}
6133
6135 const Expr *Allocator,
6136 const Expr *AllocatorTraits) {
6137 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6138 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6139 // Use default memspace handle.
6140 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6141 llvm::Value *NumTraits = llvm::ConstantInt::get(
6143 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6144 ->getSize()
6145 .getLimitedValue());
6146 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6148 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6149 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6150 AllocatorTraitsLVal.getBaseInfo(),
6151 AllocatorTraitsLVal.getTBAAInfo());
6152 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6153
6154 llvm::Value *AllocatorVal =
6155 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6156 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6157 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6158 // Store to allocator.
6160 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6161 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6162 AllocatorVal =
6163 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6164 Allocator->getType(), Allocator->getExprLoc());
6165 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6166}
6167
6169 const Expr *Allocator) {
6170 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6171 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6172 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6173 llvm::Value *AllocatorVal =
6174 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6175 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6176 CGF.getContext().VoidPtrTy,
6177 Allocator->getExprLoc());
6178 (void)CGF.EmitRuntimeCall(
6179 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6180 OMPRTL___kmpc_destroy_allocator),
6181 {ThreadId, AllocatorVal});
6182}
6183
6186 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6187 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6188 "invalid default attrs structure");
6189 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6190 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6191
6192 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6193 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6194 /*UpperBoundOnly=*/true);
6195
6196 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6197 for (auto *A : C->getAttrs()) {
6198 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6199 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6200 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6201 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6202 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6203 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6204 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6205 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6206 &AttrMaxThreadsVal);
6207 else
6208 continue;
6209
6210 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6211 if (AttrMaxThreadsVal > 0)
6212 MaxThreadsVal = MaxThreadsVal > 0
6213 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6214 : AttrMaxThreadsVal;
6215 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6216 if (AttrMaxBlocksVal > 0)
6217 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6218 : AttrMaxBlocksVal;
6219 }
6220 }
6221}
6222
6224 const OMPExecutableDirective &D, StringRef ParentName,
6225 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6226 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6227
6228 llvm::TargetRegionEntryInfo EntryInfo =
6229 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6230
6231 CodeGenFunction CGF(CGM, true);
6232 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6233 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6234 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6235
6236 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6237 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6238 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6239 };
6240
6241 cantFail(OMPBuilder.emitTargetRegionFunction(
6242 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6243 OutlinedFnID));
6244
6245 if (!OutlinedFn)
6246 return;
6247
6248 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6249
6250 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6251 for (auto *A : C->getAttrs()) {
6252 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6253 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6254 }
6255 }
6256}
6257
6258/// Checks if the expression is constant or does not have non-trivial function
6259/// calls.
6260static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6261 // We can skip constant expressions.
6262 // We can skip expressions with trivial calls or simple expressions.
6264 !E->hasNonTrivialCall(Ctx)) &&
6265 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6266}
6267
6269 const Stmt *Body) {
6270 const Stmt *Child = Body->IgnoreContainers();
6271 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6272 Child = nullptr;
6273 for (const Stmt *S : C->body()) {
6274 if (const auto *E = dyn_cast<Expr>(S)) {
6275 if (isTrivial(Ctx, E))
6276 continue;
6277 }
6278 // Some of the statements can be ignored.
6281 continue;
6282 // Analyze declarations.
6283 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6284 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6285 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6286 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6287 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6288 isa<UsingDirectiveDecl>(D) ||
6289 isa<OMPDeclareReductionDecl>(D) ||
6290 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6291 return true;
6292 const auto *VD = dyn_cast<VarDecl>(D);
6293 if (!VD)
6294 return false;
6295 return VD->hasGlobalStorage() || !VD->isUsed();
6296 }))
6297 continue;
6298 }
6299 // Found multiple children - cannot get the one child only.
6300 if (Child)
6301 return nullptr;
6302 Child = S;
6303 }
6304 if (Child)
6305 Child = Child->IgnoreContainers();
6306 }
6307 return Child;
6308}
6309
6311 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6312 int32_t &MaxTeamsVal) {
6313
6314 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6315 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6316 "Expected target-based executable directive.");
6317 switch (DirectiveKind) {
6318 case OMPD_target: {
6319 const auto *CS = D.getInnermostCapturedStmt();
6320 const auto *Body =
6321 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6322 const Stmt *ChildStmt =
6324 if (const auto *NestedDir =
6325 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6326 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6327 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6328 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6329 ->getNumTeams()
6330 .front();
6331 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6332 if (auto Constant =
6333 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6334 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6335 return NumTeams;
6336 }
6337 MinTeamsVal = MaxTeamsVal = 0;
6338 return nullptr;
6339 }
6340 MinTeamsVal = MaxTeamsVal = 1;
6341 return nullptr;
6342 }
6343 // A value of -1 is used to check if we need to emit no teams region
6344 MinTeamsVal = MaxTeamsVal = -1;
6345 return nullptr;
6346 }
6347 case OMPD_target_teams_loop:
6348 case OMPD_target_teams:
6349 case OMPD_target_teams_distribute:
6350 case OMPD_target_teams_distribute_simd:
6351 case OMPD_target_teams_distribute_parallel_for:
6352 case OMPD_target_teams_distribute_parallel_for_simd: {
6353 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6354 const Expr *NumTeams =
6355 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6356 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6357 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6358 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6359 return NumTeams;
6360 }
6361 MinTeamsVal = MaxTeamsVal = 0;
6362 return nullptr;
6363 }
6364 case OMPD_target_parallel:
6365 case OMPD_target_parallel_for:
6366 case OMPD_target_parallel_for_simd:
6367 case OMPD_target_parallel_loop:
6368 case OMPD_target_simd:
6369 MinTeamsVal = MaxTeamsVal = 1;
6370 return nullptr;
6371 case OMPD_parallel:
6372 case OMPD_for:
6373 case OMPD_parallel_for:
6374 case OMPD_parallel_loop:
6375 case OMPD_parallel_master:
6376 case OMPD_parallel_sections:
6377 case OMPD_for_simd:
6378 case OMPD_parallel_for_simd:
6379 case OMPD_cancel:
6380 case OMPD_cancellation_point:
6381 case OMPD_ordered:
6382 case OMPD_threadprivate:
6383 case OMPD_allocate:
6384 case OMPD_task:
6385 case OMPD_simd:
6386 case OMPD_tile:
6387 case OMPD_unroll:
6388 case OMPD_sections:
6389 case OMPD_section:
6390 case OMPD_single:
6391 case OMPD_master:
6392 case OMPD_critical:
6393 case OMPD_taskyield:
6394 case OMPD_barrier:
6395 case OMPD_taskwait:
6396 case OMPD_taskgroup:
6397 case OMPD_atomic:
6398 case OMPD_flush:
6399 case OMPD_depobj:
6400 case OMPD_scan:
6401 case OMPD_teams:
6402 case OMPD_target_data:
6403 case OMPD_target_exit_data:
6404 case OMPD_target_enter_data:
6405 case OMPD_distribute:
6406 case OMPD_distribute_simd:
6407 case OMPD_distribute_parallel_for:
6408 case OMPD_distribute_parallel_for_simd:
6409 case OMPD_teams_distribute:
6410 case OMPD_teams_distribute_simd:
6411 case OMPD_teams_distribute_parallel_for:
6412 case OMPD_teams_distribute_parallel_for_simd:
6413 case OMPD_target_update:
6414 case OMPD_declare_simd:
6415 case OMPD_declare_variant:
6416 case OMPD_begin_declare_variant:
6417 case OMPD_end_declare_variant:
6418 case OMPD_declare_target:
6419 case OMPD_end_declare_target:
6420 case OMPD_declare_reduction:
6421 case OMPD_declare_mapper:
6422 case OMPD_taskloop:
6423 case OMPD_taskloop_simd:
6424 case OMPD_master_taskloop:
6425 case OMPD_master_taskloop_simd:
6426 case OMPD_parallel_master_taskloop:
6427 case OMPD_parallel_master_taskloop_simd:
6428 case OMPD_requires:
6429 case OMPD_metadirective:
6430 case OMPD_unknown:
6431 break;
6432 default:
6433 break;
6434 }
6435 llvm_unreachable("Unexpected directive kind.");
6436}
6437
6439 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6440 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6441 "Clauses associated with the teams directive expected to be emitted "
6442 "only for the host!");
6443 CGBuilderTy &Bld = CGF.Builder;
6444 int32_t MinNT = -1, MaxNT = -1;
6445 const Expr *NumTeams =
6446 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6447 if (NumTeams != nullptr) {
6448 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6449
6450 switch (DirectiveKind) {
6451 case OMPD_target: {
6452 const auto *CS = D.getInnermostCapturedStmt();
6453 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6454 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6455 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6456 /*IgnoreResultAssign*/ true);
6457 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6458 /*isSigned=*/true);
6459 }
6460 case OMPD_target_teams:
6461 case OMPD_target_teams_distribute:
6462 case OMPD_target_teams_distribute_simd:
6463 case OMPD_target_teams_distribute_parallel_for:
6464 case OMPD_target_teams_distribute_parallel_for_simd: {
6465 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6466 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6467 /*IgnoreResultAssign*/ true);
6468 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6469 /*isSigned=*/true);
6470 }
6471 default:
6472 break;
6473 }
6474 }
6475
6476 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6477 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6478}
6479
6480/// Check for a num threads constant value (stored in \p DefaultVal), or
6481/// expression (stored in \p E). If the value is conditional (via an if-clause),
6482/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6483/// nullptr, no expression evaluation is perfomed.
6484static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6485 const Expr **E, int32_t &UpperBound,
6486 bool UpperBoundOnly, llvm::Value **CondVal) {
6488 CGF.getContext(), CS->getCapturedStmt());
6489 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6490 if (!Dir)
6491 return;
6492
6493 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6494 // Handle if clause. If if clause present, the number of threads is
6495 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6496 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6497 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6498 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6499 const OMPIfClause *IfClause = nullptr;
6500 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6501 if (C->getNameModifier() == OMPD_unknown ||
6502 C->getNameModifier() == OMPD_parallel) {
6503 IfClause = C;
6504 break;
6505 }
6506 }
6507 if (IfClause) {
6508 const Expr *CondExpr = IfClause->getCondition();
6509 bool Result;
6510 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6511 if (!Result) {
6512 UpperBound = 1;
6513 return;
6514 }
6515 } else {
6517 if (const auto *PreInit =
6518 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6519 for (const auto *I : PreInit->decls()) {
6520 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6521 CGF.EmitVarDecl(cast<VarDecl>(*I));
6522 } else {
6525 CGF.EmitAutoVarCleanups(Emission);
6526 }
6527 }
6528 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6529 }
6530 }
6531 }
6532 }
6533 // Check the value of num_threads clause iff if clause was not specified
6534 // or is not evaluated to false.
6535 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6536 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6537 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6538 const auto *NumThreadsClause =
6539 Dir->getSingleClause<OMPNumThreadsClause>();
6540 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6541 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6542 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6543 UpperBound =
6544 UpperBound
6545 ? Constant->getZExtValue()
6546 : std::min(UpperBound,
6547 static_cast<int32_t>(Constant->getZExtValue()));
6548 // If we haven't found a upper bound, remember we saw a thread limiting
6549 // clause.
6550 if (UpperBound == -1)
6551 UpperBound = 0;
6552 if (!E)
6553 return;
6554 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6555 if (const auto *PreInit =
6556 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6557 for (const auto *I : PreInit->decls()) {
6558 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6559 CGF.EmitVarDecl(cast<VarDecl>(*I));
6560 } else {
6563 CGF.EmitAutoVarCleanups(Emission);
6564 }
6565 }
6566 }
6567 *E = NTExpr;
6568 }
6569 return;
6570 }
6571 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6572 UpperBound = 1;
6573}
6574
6576 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6577 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6578 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6579 "Clauses associated with the teams directive expected to be emitted "
6580 "only for the host!");
6581 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6582 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6583 "Expected target-based executable directive.");
6584
6585 const Expr *NT = nullptr;
6586 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6587
6588 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6589 if (E->isIntegerConstantExpr(CGF.getContext())) {
6590 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6591 UpperBound = UpperBound ? Constant->getZExtValue()
6592 : std::min(UpperBound,
6593 int32_t(Constant->getZExtValue()));
6594 }
6595 // If we haven't found a upper bound, remember we saw a thread limiting
6596 // clause.
6597 if (UpperBound == -1)
6598 UpperBound = 0;
6599 if (EPtr)
6600 *EPtr = E;
6601 };
6602
6603 auto ReturnSequential = [&]() {
6604 UpperBound = 1;
6605 return NT;
6606 };
6607
6608 switch (DirectiveKind) {
6609 case OMPD_target: {
6610 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6611 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6613 CGF.getContext(), CS->getCapturedStmt());
6614 // TODO: The standard is not clear how to resolve two thread limit clauses,
6615 // let's pick the teams one if it's present, otherwise the target one.
6616 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6617 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6618 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6619 ThreadLimitClause = TLC;
6620 if (ThreadLimitExpr) {
6621 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6622 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6624 CGF,
6625 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6626 if (const auto *PreInit =
6627 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6628 for (const auto *I : PreInit->decls()) {
6629 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6630 CGF.EmitVarDecl(cast<VarDecl>(*I));
6631 } else {
6634 CGF.EmitAutoVarCleanups(Emission);
6635 }
6636 }
6637 }
6638 }
6639 }
6640 }
6641 if (ThreadLimitClause)
6642 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6643 ThreadLimitExpr);
6644 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6645 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6646 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6647 CS = Dir->getInnermostCapturedStmt();
6649 CGF.getContext(), CS->getCapturedStmt());
6650 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6651 }
6652 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6653 CS = Dir->getInnermostCapturedStmt();
6654 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6655 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6656 return ReturnSequential();
6657 }
6658 return NT;
6659 }
6660 case OMPD_target_teams: {
6661 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6662 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6663 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6664 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6665 ThreadLimitExpr);
6666 }
6667 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6668 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6670 CGF.getContext(), CS->getCapturedStmt());
6671 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6672 if (Dir->getDirectiveKind() == OMPD_distribute) {
6673 CS = Dir->getInnermostCapturedStmt();
6674 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6675 }
6676 }
6677 return NT;
6678 }
6679 case OMPD_target_teams_distribute:
6680 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6681 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6682 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6683 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6684 ThreadLimitExpr);
6685 }
6686 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6687 UpperBoundOnly, CondVal);
6688 return NT;
6689 case OMPD_target_teams_loop:
6690 case OMPD_target_parallel_loop:
6691 case OMPD_target_parallel:
6692 case OMPD_target_parallel_for:
6693 case OMPD_target_parallel_for_simd:
6694 case OMPD_target_teams_distribute_parallel_for:
6695 case OMPD_target_teams_distribute_parallel_for_simd: {
6696 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6697 const OMPIfClause *IfClause = nullptr;
6698 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6699 if (C->getNameModifier() == OMPD_unknown ||
6700 C->getNameModifier() == OMPD_parallel) {
6701 IfClause = C;
6702 break;
6703 }
6704 }
6705 if (IfClause) {
6706 const Expr *Cond = IfClause->getCondition();
6707 bool Result;
6708 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6709 if (!Result)
6710 return ReturnSequential();
6711 } else {
6713 *CondVal = CGF.EvaluateExprAsBool(Cond);
6714 }
6715 }
6716 }
6717 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6718 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6719 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6720 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6721 ThreadLimitExpr);
6722 }
6723 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6724 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6725 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6726 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6727 return NumThreadsClause->getNumThreads();
6728 }
6729 return NT;
6730 }
6731 case OMPD_target_teams_distribute_simd:
6732 case OMPD_target_simd:
6733 return ReturnSequential();
6734 default:
6735 break;
6736 }
6737 llvm_unreachable("Unsupported directive kind.");
6738}
6739
6741 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6742 llvm::Value *NumThreadsVal = nullptr;
6743 llvm::Value *CondVal = nullptr;
6744 llvm::Value *ThreadLimitVal = nullptr;
6745 const Expr *ThreadLimitExpr = nullptr;
6746 int32_t UpperBound = -1;
6747
6749 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6750 &ThreadLimitExpr);
6751
6752 // Thread limit expressions are used below, emit them.
6753 if (ThreadLimitExpr) {
6754 ThreadLimitVal =
6755 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6756 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6757 /*isSigned=*/false);
6758 }
6759
6760 // Generate the num teams expression.
6761 if (UpperBound == 1) {
6762 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6763 } else if (NT) {
6764 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6765 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6766 /*isSigned=*/false);
6767 } else if (ThreadLimitVal) {
6768 // If we do not have a num threads value but a thread limit, replace the
6769 // former with the latter. We know handled the thread limit expression.
6770 NumThreadsVal = ThreadLimitVal;
6771 ThreadLimitVal = nullptr;
6772 } else {
6773 // Default to "0" which means runtime choice.
6774 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6775 NumThreadsVal = CGF.Builder.getInt32(0);
6776 }
6777
6778 // Handle if clause. If if clause present, the number of threads is
6779 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6780 if (CondVal) {
6782 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6783 CGF.Builder.getInt32(1));
6784 }
6785
6786 // If the thread limit and num teams expression were present, take the
6787 // minimum.
6788 if (ThreadLimitVal) {
6789 NumThreadsVal = CGF.Builder.CreateSelect(
6790 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6791 ThreadLimitVal, NumThreadsVal);
6792 }
6793
6794 return NumThreadsVal;
6795}
6796
6797namespace {
6799
6800// Utility to handle information from clauses associated with a given
6801// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6802// It provides a convenient interface to obtain the information and generate
6803// code for that information.
6804class MappableExprsHandler {
6805public:
6806 /// Custom comparator for attach-pointer expressions that compares them by
6807 /// complexity (i.e. their component-depth) first, then by the order in which
6808 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6809 /// different.
6810 struct AttachPtrExprComparator {
6811 const MappableExprsHandler &Handler;
6812 // Cache of previous equality comparison results.
6813 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6814 CachedEqualityComparisons;
6815
6816 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6817 AttachPtrExprComparator() = delete;
6818
6819 // Return true iff LHS is "less than" RHS.
6820 bool operator()(const Expr *LHS, const Expr *RHS) const {
6821 if (LHS == RHS)
6822 return false;
6823
6824 // First, compare by complexity (depth)
6825 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6826 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6827
6828 std::optional<size_t> DepthLHS =
6829 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6830 : std::nullopt;
6831 std::optional<size_t> DepthRHS =
6832 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6833 : std::nullopt;
6834
6835 // std::nullopt (no attach pointer) has lowest complexity
6836 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6837 // Both have same complexity, now check semantic equality
6838 if (areEqual(LHS, RHS))
6839 return false;
6840 // Different semantically, compare by computation order
6841 return wasComputedBefore(LHS, RHS);
6842 }
6843 if (!DepthLHS.has_value())
6844 return true; // LHS has lower complexity
6845 if (!DepthRHS.has_value())
6846 return false; // RHS has lower complexity
6847
6848 // Both have values, compare by depth (lower depth = lower complexity)
6849 if (DepthLHS.value() != DepthRHS.value())
6850 return DepthLHS.value() < DepthRHS.value();
6851
6852 // Same complexity, now check semantic equality
6853 if (areEqual(LHS, RHS))
6854 return false;
6855 // Different semantically, compare by computation order
6856 return wasComputedBefore(LHS, RHS);
6857 }
6858
6859 public:
6860 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
6861 /// results, if available, otherwise does a recursive semantic comparison.
6862 bool areEqual(const Expr *LHS, const Expr *RHS) const {
6863 // Check cache first for faster lookup
6864 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
6865 if (CachedResultIt != CachedEqualityComparisons.end())
6866 return CachedResultIt->second;
6867
6868 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
6869
6870 // Cache the result for future lookups (both orders since semantic
6871 // equality is commutative)
6872 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
6873 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
6874 return ComparisonResult;
6875 }
6876
6877 /// Compare the two attach-ptr expressions by their computation order.
6878 /// Returns true iff LHS was computed before RHS by
6879 /// collectAttachPtrExprInfo().
6880 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
6881 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
6882 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
6883
6884 return OrderLHS < OrderRHS;
6885 }
6886
6887 private:
6888 /// Helper function to compare attach-pointer expressions semantically.
6889 /// This function handles various expression types that can be part of an
6890 /// attach-pointer.
6891 /// TODO: Not urgent, but we should ideally return true when comparing
6892 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
6893 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
6894 if (LHS == RHS)
6895 return true;
6896
6897 // If only one is null, they aren't equal
6898 if (!LHS || !RHS)
6899 return false;
6900
6901 ASTContext &Ctx = Handler.CGF.getContext();
6902 // Strip away parentheses and no-op casts to get to the core expression
6903 LHS = LHS->IgnoreParenNoopCasts(Ctx);
6904 RHS = RHS->IgnoreParenNoopCasts(Ctx);
6905
6906 // Direct pointer comparison of the underlying expressions
6907 if (LHS == RHS)
6908 return true;
6909
6910 // Check if the expression classes match
6911 if (LHS->getStmtClass() != RHS->getStmtClass())
6912 return false;
6913
6914 // Handle DeclRefExpr (variable references)
6915 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
6916 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
6917 if (!RD)
6918 return false;
6919 return LD->getDecl()->getCanonicalDecl() ==
6920 RD->getDecl()->getCanonicalDecl();
6921 }
6922
6923 // Handle ArraySubscriptExpr (array indexing like a[i])
6924 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
6925 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
6926 if (!RA)
6927 return false;
6928 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
6929 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
6930 }
6931
6932 // Handle MemberExpr (member access like s.m or p->m)
6933 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
6934 const auto *RM = dyn_cast<MemberExpr>(RHS);
6935 if (!RM)
6936 return false;
6937 if (LM->getMemberDecl()->getCanonicalDecl() !=
6938 RM->getMemberDecl()->getCanonicalDecl())
6939 return false;
6940 return areSemanticallyEqual(LM->getBase(), RM->getBase());
6941 }
6942
6943 // Handle UnaryOperator (unary operations like *p, &x, etc.)
6944 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
6945 const auto *RU = dyn_cast<UnaryOperator>(RHS);
6946 if (!RU)
6947 return false;
6948 if (LU->getOpcode() != RU->getOpcode())
6949 return false;
6950 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
6951 }
6952
6953 // Handle BinaryOperator (binary operations like p + offset)
6954 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
6955 const auto *RB = dyn_cast<BinaryOperator>(RHS);
6956 if (!RB)
6957 return false;
6958 if (LB->getOpcode() != RB->getOpcode())
6959 return false;
6960 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
6961 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
6962 }
6963
6964 // Handle ArraySectionExpr (array sections like a[0:1])
6965 // Attach pointers should not contain array-sections, but currently we
6966 // don't emit an error.
6967 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
6968 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
6969 if (!RAS)
6970 return false;
6971 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
6972 areSemanticallyEqual(LAS->getLowerBound(),
6973 RAS->getLowerBound()) &&
6974 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
6975 }
6976
6977 // Handle CastExpr (explicit casts)
6978 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
6979 const auto *RC = dyn_cast<CastExpr>(RHS);
6980 if (!RC)
6981 return false;
6982 if (LC->getCastKind() != RC->getCastKind())
6983 return false;
6984 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
6985 }
6986
6987 // Handle CXXThisExpr (this pointer)
6988 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
6989 return true;
6990
6991 // Handle IntegerLiteral (integer constants)
6992 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
6993 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
6994 if (!RI)
6995 return false;
6996 return LI->getValue() == RI->getValue();
6997 }
6998
6999 // Handle CharacterLiteral (character constants)
7000 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7001 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7002 if (!RC)
7003 return false;
7004 return LC->getValue() == RC->getValue();
7005 }
7006
7007 // Handle FloatingLiteral (floating point constants)
7008 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7009 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7010 if (!RF)
7011 return false;
7012 // Use bitwise comparison for floating point literals
7013 return LF->getValue().bitwiseIsEqual(RF->getValue());
7014 }
7015
7016 // Handle StringLiteral (string constants)
7017 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7018 const auto *RS = dyn_cast<StringLiteral>(RHS);
7019 if (!RS)
7020 return false;
7021 return LS->getString() == RS->getString();
7022 }
7023
7024 // Handle CXXNullPtrLiteralExpr (nullptr)
7026 return true;
7027
7028 // Handle CXXBoolLiteralExpr (true/false)
7029 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7030 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7031 if (!RB)
7032 return false;
7033 return LB->getValue() == RB->getValue();
7034 }
7035
7036 // Fallback for other forms - use the existing comparison method
7037 return Expr::isSameComparisonOperand(LHS, RHS);
7038 }
7039 };
7040
7041 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7042 static unsigned getFlagMemberOffset() {
7043 unsigned Offset = 0;
7044 for (uint64_t Remain =
7045 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7046 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7047 !(Remain & 1); Remain = Remain >> 1)
7048 Offset++;
7049 return Offset;
7050 }
7051
7052 /// Class that holds debugging information for a data mapping to be passed to
7053 /// the runtime library.
7054 class MappingExprInfo {
7055 /// The variable declaration used for the data mapping.
7056 const ValueDecl *MapDecl = nullptr;
7057 /// The original expression used in the map clause, or null if there is
7058 /// none.
7059 const Expr *MapExpr = nullptr;
7060
7061 public:
7062 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7063 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7064
7065 const ValueDecl *getMapDecl() const { return MapDecl; }
7066 const Expr *getMapExpr() const { return MapExpr; }
7067 };
7068
7069 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7070 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7071 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7072 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7073 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7074 using MapNonContiguousArrayTy =
7075 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7076 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7077 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7078 using MapData =
7080 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7081 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7082 using MapDataArrayTy = SmallVector<MapData, 4>;
7083
7084 /// This structure contains combined information generated for mappable
7085 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7086 /// mappers, and non-contiguous information.
7087 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7088 MapExprsArrayTy Exprs;
7089 MapValueDeclsArrayTy Mappers;
7090 MapValueDeclsArrayTy DevicePtrDecls;
7091
7092 /// Append arrays in \a CurInfo.
7093 void append(MapCombinedInfoTy &CurInfo) {
7094 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7095 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7096 CurInfo.DevicePtrDecls.end());
7097 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7098 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7099 }
7100 };
7101
7102 /// Map between a struct and the its lowest & highest elements which have been
7103 /// mapped.
7104 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7105 /// HE(FieldIndex, Pointer)}
7106 struct StructRangeInfoTy {
7107 MapCombinedInfoTy PreliminaryMapData;
7108 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7109 0, Address::invalid()};
7110 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7111 0, Address::invalid()};
7112 Address Base = Address::invalid();
7113 Address LB = Address::invalid();
7114 bool IsArraySection = false;
7115 bool HasCompleteRecord = false;
7116 };
7117
7118 /// A struct to store the attach pointer and pointee information, to be used
7119 /// when emitting an attach entry.
7120 struct AttachInfoTy {
7121 Address AttachPtrAddr = Address::invalid();
7122 Address AttachPteeAddr = Address::invalid();
7123 const ValueDecl *AttachPtrDecl = nullptr;
7124 const Expr *AttachMapExpr = nullptr;
7125
7126 bool isValid() const {
7127 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7128 }
7129 };
7130
7131 /// Check if there's any component list where the attach pointer expression
7132 /// matches the given captured variable.
7133 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7134 for (const auto &AttachEntry : AttachPtrExprMap) {
7135 if (AttachEntry.second) {
7136 // Check if the attach pointer expression is a DeclRefExpr that
7137 // references the captured variable
7138 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7139 if (DRE->getDecl() == VD)
7140 return true;
7141 }
7142 }
7143 return false;
7144 }
7145
7146 /// Get the previously-cached attach pointer for a component list, if-any.
7147 const Expr *getAttachPtrExpr(
7149 const {
7150 const auto It = AttachPtrExprMap.find(Components);
7151 if (It != AttachPtrExprMap.end())
7152 return It->second;
7153
7154 return nullptr;
7155 }
7156
7157private:
7158 /// Kind that defines how a device pointer has to be returned.
7159 struct MapInfo {
7162 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7163 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7164 bool ReturnDevicePointer = false;
7165 bool IsImplicit = false;
7166 const ValueDecl *Mapper = nullptr;
7167 const Expr *VarRef = nullptr;
7168 bool ForDeviceAddr = false;
7169
7170 MapInfo() = default;
7171 MapInfo(
7173 OpenMPMapClauseKind MapType,
7174 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7175 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7176 bool ReturnDevicePointer, bool IsImplicit,
7177 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7178 bool ForDeviceAddr = false)
7179 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7180 MotionModifiers(MotionModifiers),
7181 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7182 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7183 };
7184
7185 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7186 /// member and there is no map information about it, then emission of that
7187 /// entry is deferred until the whole struct has been processed.
7188 struct DeferredDevicePtrEntryTy {
7189 const Expr *IE = nullptr;
7190 const ValueDecl *VD = nullptr;
7191 bool ForDeviceAddr = false;
7192
7193 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7194 bool ForDeviceAddr)
7195 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7196 };
7197
7198 /// The target directive from where the mappable clauses were extracted. It
7199 /// is either a executable directive or a user-defined mapper directive.
7200 llvm::PointerUnion<const OMPExecutableDirective *,
7201 const OMPDeclareMapperDecl *>
7202 CurDir;
7203
7204 /// Function the directive is being generated for.
7205 CodeGenFunction &CGF;
7206
7207 /// Set of all first private variables in the current directive.
7208 /// bool data is set to true if the variable is implicitly marked as
7209 /// firstprivate, false otherwise.
7210 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7211
7212 /// Map between device pointer declarations and their expression components.
7213 /// The key value for declarations in 'this' is null.
7214 llvm::DenseMap<
7215 const ValueDecl *,
7216 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7217 DevPointersMap;
7218
7219 /// Map between device addr declarations and their expression components.
7220 /// The key value for declarations in 'this' is null.
7221 llvm::DenseMap<
7222 const ValueDecl *,
7223 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7224 HasDevAddrsMap;
7225
7226 /// Map between lambda declarations and their map type.
7227 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7228
7229 /// Map from component lists to their attach pointer expressions.
7231 const Expr *>
7232 AttachPtrExprMap;
7233
7234 /// Map from attach pointer expressions to their component depth.
7235 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7236 /// expressions with increasing/decreasing depth.
7237 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7238 /// TODO: Not urgent, but we should ideally use the number of pointer
7239 /// dereferences in an expr as an indicator of its complexity, instead of the
7240 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7241 /// `*(p + 5 + 5)` together.
7242 llvm::DenseMap<const Expr *, std::optional<size_t>>
7243 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7244
7245 /// Map from attach pointer expressions to the order they were computed in, in
7246 /// collectAttachPtrExprInfo().
7247 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7248 {nullptr, 0}};
7249
7250 /// An instance of attach-ptr-expr comparator that can be used throughout the
7251 /// lifetime of this handler.
7252 AttachPtrExprComparator AttachPtrComparator;
7253
7254 llvm::Value *getExprTypeSize(const Expr *E) const {
7255 QualType ExprTy = E->getType().getCanonicalType();
7256
7257 // Calculate the size for array shaping expression.
7258 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7259 llvm::Value *Size =
7260 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7261 for (const Expr *SE : OAE->getDimensions()) {
7262 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7263 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7264 CGF.getContext().getSizeType(),
7265 SE->getExprLoc());
7266 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7267 }
7268 return Size;
7269 }
7270
7271 // Reference types are ignored for mapping purposes.
7272 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7273 ExprTy = RefTy->getPointeeType().getCanonicalType();
7274
7275 // Given that an array section is considered a built-in type, we need to
7276 // do the calculation based on the length of the section instead of relying
7277 // on CGF.getTypeSize(E->getType()).
7278 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7279 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7280 OAE->getBase()->IgnoreParenImpCasts())
7282
7283 // If there is no length associated with the expression and lower bound is
7284 // not specified too, that means we are using the whole length of the
7285 // base.
7286 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7287 !OAE->getLowerBound())
7288 return CGF.getTypeSize(BaseTy);
7289
7290 llvm::Value *ElemSize;
7291 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7292 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7293 } else {
7294 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7295 assert(ATy && "Expecting array type if not a pointer type.");
7296 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7297 }
7298
7299 // If we don't have a length at this point, that is because we have an
7300 // array section with a single element.
7301 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7302 return ElemSize;
7303
7304 if (const Expr *LenExpr = OAE->getLength()) {
7305 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7306 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7307 CGF.getContext().getSizeType(),
7308 LenExpr->getExprLoc());
7309 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7310 }
7311 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7312 OAE->getLowerBound() && "expected array_section[lb:].");
7313 // Size = sizetype - lb * elemtype;
7314 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7315 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7316 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7317 CGF.getContext().getSizeType(),
7318 OAE->getLowerBound()->getExprLoc());
7319 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7320 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7321 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7322 LengthVal = CGF.Builder.CreateSelect(
7323 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7324 return LengthVal;
7325 }
7326 return CGF.getTypeSize(ExprTy);
7327 }
7328
7329 /// Return the corresponding bits for a given map clause modifier. Add
7330 /// a flag marking the map as a pointer if requested. Add a flag marking the
7331 /// map as the first one of a series of maps that relate to the same map
7332 /// expression.
7333 OpenMPOffloadMappingFlags getMapTypeBits(
7334 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7335 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7336 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7337 OpenMPOffloadMappingFlags Bits =
7338 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7339 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7340 switch (MapType) {
7341 case OMPC_MAP_alloc:
7342 case OMPC_MAP_release:
7343 // alloc and release is the default behavior in the runtime library, i.e.
7344 // if we don't pass any bits alloc/release that is what the runtime is
7345 // going to do. Therefore, we don't need to signal anything for these two
7346 // type modifiers.
7347 break;
7348 case OMPC_MAP_to:
7349 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7350 break;
7351 case OMPC_MAP_from:
7352 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7353 break;
7354 case OMPC_MAP_tofrom:
7355 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7356 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7357 break;
7358 case OMPC_MAP_delete:
7359 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7360 break;
7361 case OMPC_MAP_unknown:
7362 llvm_unreachable("Unexpected map type!");
7363 }
7364 if (AddPtrFlag)
7365 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7366 if (AddIsTargetParamFlag)
7367 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7368 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7369 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7370 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7371 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7372 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7373 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7374 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7375 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7376 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7377 if (IsNonContiguous)
7378 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7379 return Bits;
7380 }
7381
7382 /// Return true if the provided expression is a final array section. A
7383 /// final array section, is one whose length can't be proved to be one.
7384 bool isFinalArraySectionExpression(const Expr *E) const {
7385 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7386
7387 // It is not an array section and therefore not a unity-size one.
7388 if (!OASE)
7389 return false;
7390
7391 // An array section with no colon always refer to a single element.
7392 if (OASE->getColonLocFirst().isInvalid())
7393 return false;
7394
7395 const Expr *Length = OASE->getLength();
7396
7397 // If we don't have a length we have to check if the array has size 1
7398 // for this dimension. Also, we should always expect a length if the
7399 // base type is pointer.
7400 if (!Length) {
7401 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7402 OASE->getBase()->IgnoreParenImpCasts())
7404 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7405 return ATy->getSExtSize() != 1;
7406 // If we don't have a constant dimension length, we have to consider
7407 // the current section as having any size, so it is not necessarily
7408 // unitary. If it happen to be unity size, that's user fault.
7409 return true;
7410 }
7411
7412 // Check if the length evaluates to 1.
7413 Expr::EvalResult Result;
7414 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7415 return true; // Can have more that size 1.
7416
7417 llvm::APSInt ConstLength = Result.Val.getInt();
7418 return ConstLength.getSExtValue() != 1;
7419 }
7420
7421 /// A helper class to copy structures with overlapped elements, i.e. those
7422 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7423 /// are not explicitly copied have mapping nodes synthesized for them,
7424 /// taking care to avoid generating zero-sized copies.
7425 class CopyOverlappedEntryGaps {
7426 CodeGenFunction &CGF;
7427 MapCombinedInfoTy &CombinedInfo;
7428 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7429 const ValueDecl *MapDecl = nullptr;
7430 const Expr *MapExpr = nullptr;
7431 Address BP = Address::invalid();
7432 bool IsNonContiguous = false;
7433 uint64_t DimSize = 0;
7434 // These elements track the position as the struct is iterated over
7435 // (in order of increasing element address).
7436 const RecordDecl *LastParent = nullptr;
7437 uint64_t Cursor = 0;
7438 unsigned LastIndex = -1u;
7439 Address LB = Address::invalid();
7440
7441 public:
7442 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7443 MapCombinedInfoTy &CombinedInfo,
7444 OpenMPOffloadMappingFlags Flags,
7445 const ValueDecl *MapDecl, const Expr *MapExpr,
7446 Address BP, Address LB, bool IsNonContiguous,
7447 uint64_t DimSize)
7448 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7449 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7450 DimSize(DimSize), LB(LB) {}
7451
7452 void processField(
7453 const OMPClauseMappableExprCommon::MappableComponent &MC,
7454 const FieldDecl *FD,
7455 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7456 EmitMemberExprBase) {
7457 const RecordDecl *RD = FD->getParent();
7458 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7459 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7460 uint64_t FieldSize =
7462 Address ComponentLB = Address::invalid();
7463
7464 if (FD->getType()->isLValueReferenceType()) {
7465 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7466 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7467 ComponentLB =
7468 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7469 } else {
7470 ComponentLB =
7472 }
7473
7474 if (!LastParent)
7475 LastParent = RD;
7476 if (FD->getParent() == LastParent) {
7477 if (FD->getFieldIndex() != LastIndex + 1)
7478 copyUntilField(FD, ComponentLB);
7479 } else {
7480 LastParent = FD->getParent();
7481 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7482 copyUntilField(FD, ComponentLB);
7483 }
7484 Cursor = FieldOffset + FieldSize;
7485 LastIndex = FD->getFieldIndex();
7486 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7487 }
7488
7489 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7490 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7491 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7492 llvm::Value *Size =
7493 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7494 copySizedChunk(LBPtr, Size);
7495 }
7496
7497 void copyUntilEnd(Address HB) {
7498 if (LastParent) {
7499 const ASTRecordLayout &RL =
7500 CGF.getContext().getASTRecordLayout(LastParent);
7501 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7502 return;
7503 }
7504 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7505 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7506 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7507 LBPtr);
7508 copySizedChunk(LBPtr, Size);
7509 }
7510
7511 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7512 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7513 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7514 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7515 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7516 CombinedInfo.Pointers.push_back(Base);
7517 CombinedInfo.Sizes.push_back(
7518 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7519 CombinedInfo.Types.push_back(Flags);
7520 CombinedInfo.Mappers.push_back(nullptr);
7521 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7522 }
7523 };
7524
7525 /// Generate the base pointers, section pointers, sizes, map type bits, and
7526 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7527 /// map type, map or motion modifiers, and expression components.
7528 /// \a IsFirstComponent should be set to true if the provided set of
7529 /// components is the first associated with a capture.
7530 void generateInfoForComponentList(
7531 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7532 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7534 MapCombinedInfoTy &CombinedInfo,
7535 MapCombinedInfoTy &StructBaseCombinedInfo,
7536 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7537 bool IsImplicit, bool GenerateAllInfoForClauses,
7538 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7539 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7540 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7541 OverlappedElements = {},
7542 bool AreBothBasePtrAndPteeMapped = false) const {
7543 // The following summarizes what has to be generated for each map and the
7544 // types below. The generated information is expressed in this order:
7545 // base pointer, section pointer, size, flags
7546 // (to add to the ones that come from the map type and modifier).
7547 //
7548 // double d;
7549 // int i[100];
7550 // float *p;
7551 // int **a = &i;
7552 //
7553 // struct S1 {
7554 // int i;
7555 // float f[50];
7556 // }
7557 // struct S2 {
7558 // int i;
7559 // float f[50];
7560 // S1 s;
7561 // double *p;
7562 // struct S2 *ps;
7563 // int &ref;
7564 // }
7565 // S2 s;
7566 // S2 *ps;
7567 //
7568 // map(d)
7569 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7570 //
7571 // map(i)
7572 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7573 //
7574 // map(i[1:23])
7575 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7576 //
7577 // map(p)
7578 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7579 //
7580 // map(p[1:24])
7581 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7582 // in unified shared memory mode or for local pointers
7583 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7584 //
7585 // map((*a)[0:3])
7586 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7587 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7588 //
7589 // map(**a)
7590 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7591 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7592 //
7593 // map(s)
7594 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7595 //
7596 // map(s.i)
7597 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7598 //
7599 // map(s.s.f)
7600 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7601 //
7602 // map(s.p)
7603 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7604 //
7605 // map(to: s.p[:22])
7606 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7607 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7608 // &(s.p), &(s.p[0]), 22*sizeof(double),
7609 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7610 // (*) alloc space for struct members, only this is a target parameter
7611 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7612 // optimizes this entry out, same in the examples below)
7613 // (***) map the pointee (map: to)
7614 //
7615 // map(to: s.ref)
7616 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7617 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7618 // (*) alloc space for struct members, only this is a target parameter
7619 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7620 // optimizes this entry out, same in the examples below)
7621 // (***) map the pointee (map: to)
7622 //
7623 // map(s.ps)
7624 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7625 //
7626 // map(from: s.ps->s.i)
7627 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7628 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7629 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7630 //
7631 // map(to: s.ps->ps)
7632 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7633 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7634 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7635 //
7636 // map(s.ps->ps->ps)
7637 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7638 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7639 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7640 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7641 //
7642 // map(to: s.ps->ps->s.f[:22])
7643 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7644 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7645 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7646 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7647 //
7648 // map(ps)
7649 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7650 //
7651 // map(ps->i)
7652 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7653 //
7654 // map(ps->s.f)
7655 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7656 //
7657 // map(from: ps->p)
7658 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7659 //
7660 // map(to: ps->p[:22])
7661 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7662 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7663 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7664 //
7665 // map(ps->ps)
7666 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7667 //
7668 // map(from: ps->ps->s.i)
7669 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7670 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7671 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7672 //
7673 // map(from: ps->ps->ps)
7674 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7675 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7676 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7677 //
7678 // map(ps->ps->ps->ps)
7679 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7680 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7681 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7682 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7683 //
7684 // map(to: ps->ps->ps->s.f[:22])
7685 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7686 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7687 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7688 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7689 //
7690 // map(to: s.f[:22]) map(from: s.p[:33])
7691 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7692 // sizeof(double*) (**), TARGET_PARAM
7693 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7694 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7695 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7696 // (*) allocate contiguous space needed to fit all mapped members even if
7697 // we allocate space for members not mapped (in this example,
7698 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7699 // them as well because they fall between &s.f[0] and &s.p)
7700 //
7701 // map(from: s.f[:22]) map(to: ps->p[:33])
7702 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7703 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7704 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7705 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7706 // (*) the struct this entry pertains to is the 2nd element in the list of
7707 // arguments, hence MEMBER_OF(2)
7708 //
7709 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7710 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7711 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7712 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7713 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7714 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7715 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7716 // (*) the struct this entry pertains to is the 4th element in the list
7717 // of arguments, hence MEMBER_OF(4)
7718 //
7719 // map(p, p[:100])
7720 // ===> map(p[:100])
7721 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7722
7723 // Track if the map information being generated is the first for a capture.
7724 bool IsCaptureFirstInfo = IsFirstComponentList;
7725 // When the variable is on a declare target link or in a to clause with
7726 // unified memory, a reference is needed to hold the host/device address
7727 // of the variable.
7728 bool RequiresReference = false;
7729
7730 // Scan the components from the base to the complete expression.
7731 auto CI = Components.rbegin();
7732 auto CE = Components.rend();
7733 auto I = CI;
7734
7735 // Track if the map information being generated is the first for a list of
7736 // components.
7737 bool IsExpressionFirstInfo = true;
7738 bool FirstPointerInComplexData = false;
7739 Address BP = Address::invalid();
7740 const Expr *AssocExpr = I->getAssociatedExpression();
7741 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7742 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7743 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7744
7745 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7746 return;
7747 if (isa<MemberExpr>(AssocExpr)) {
7748 // The base is the 'this' pointer. The content of the pointer is going
7749 // to be the base of the field being mapped.
7750 BP = CGF.LoadCXXThisAddress();
7751 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7752 (OASE &&
7753 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7754 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7755 } else if (OAShE &&
7756 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7757 BP = Address(
7758 CGF.EmitScalarExpr(OAShE->getBase()),
7759 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7760 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7761 } else {
7762 // The base is the reference to the variable.
7763 // BP = &Var.
7764 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7765 if (const auto *VD =
7766 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7767 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7768 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7769 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7770 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7771 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7773 RequiresReference = true;
7775 }
7776 }
7777 }
7778
7779 // If the variable is a pointer and is being dereferenced (i.e. is not
7780 // the last component), the base has to be the pointer itself, not its
7781 // reference. References are ignored for mapping purposes.
7782 QualType Ty =
7783 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7784 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7785 // No need to generate individual map information for the pointer, it
7786 // can be associated with the combined storage if shared memory mode is
7787 // active or the base declaration is not global variable.
7788 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7789 if (!AreBothBasePtrAndPteeMapped &&
7791 !VD || VD->hasLocalStorage()))
7792 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7793 else
7794 FirstPointerInComplexData = true;
7795 ++I;
7796 }
7797 }
7798
7799 // Track whether a component of the list should be marked as MEMBER_OF some
7800 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7801 // in a component list should be marked as MEMBER_OF, all subsequent entries
7802 // do not belong to the base struct. E.g.
7803 // struct S2 s;
7804 // s.ps->ps->ps->f[:]
7805 // (1) (2) (3) (4)
7806 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7807 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7808 // is the pointee of ps(2) which is not member of struct s, so it should not
7809 // be marked as such (it is still PTR_AND_OBJ).
7810 // The variable is initialized to false so that PTR_AND_OBJ entries which
7811 // are not struct members are not considered (e.g. array of pointers to
7812 // data).
7813 bool ShouldBeMemberOf = false;
7814
7815 // Variable keeping track of whether or not we have encountered a component
7816 // in the component list which is a member expression. Useful when we have a
7817 // pointer or a final array section, in which case it is the previous
7818 // component in the list which tells us whether we have a member expression.
7819 // E.g. X.f[:]
7820 // While processing the final array section "[:]" it is "f" which tells us
7821 // whether we are dealing with a member of a declared struct.
7822 const MemberExpr *EncounteredME = nullptr;
7823
7824 // Track for the total number of dimension. Start from one for the dummy
7825 // dimension.
7826 uint64_t DimSize = 1;
7827
7828 // Detects non-contiguous updates due to strided accesses.
7829 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7830 // correctly when generating information to be passed to the runtime. The
7831 // flag is set to true if any array section has a stride not equal to 1, or
7832 // if the stride is not a constant expression (conservatively assumed
7833 // non-contiguous).
7834 bool IsNonContiguous =
7835 CombinedInfo.NonContigInfo.IsNonContiguous ||
7836 any_of(Components, [&](const auto &Component) {
7837 const auto *OASE =
7838 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7839 if (!OASE)
7840 return false;
7841
7842 const Expr *StrideExpr = OASE->getStride();
7843 if (!StrideExpr)
7844 return false;
7845
7846 const auto Constant =
7847 StrideExpr->getIntegerConstantExpr(CGF.getContext());
7848 if (!Constant)
7849 return false;
7850
7851 return !Constant->isOne();
7852 });
7853
7854 bool IsPrevMemberReference = false;
7855
7856 bool IsPartialMapped =
7857 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7858
7859 // We need to check if we will be encountering any MEs. If we do not
7860 // encounter any ME expression it means we will be mapping the whole struct.
7861 // In that case we need to skip adding an entry for the struct to the
7862 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7863 // list only when generating all info for clauses.
7864 bool IsMappingWholeStruct = true;
7865 if (!GenerateAllInfoForClauses) {
7866 IsMappingWholeStruct = false;
7867 } else {
7868 for (auto TempI = I; TempI != CE; ++TempI) {
7869 const MemberExpr *PossibleME =
7870 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7871 if (PossibleME) {
7872 IsMappingWholeStruct = false;
7873 break;
7874 }
7875 }
7876 }
7877
7878 for (; I != CE; ++I) {
7879 // If the current component is member of a struct (parent struct) mark it.
7880 if (!EncounteredME) {
7881 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7882 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7883 // as MEMBER_OF the parent struct.
7884 if (EncounteredME) {
7885 ShouldBeMemberOf = true;
7886 // Do not emit as complex pointer if this is actually not array-like
7887 // expression.
7888 if (FirstPointerInComplexData) {
7889 QualType Ty = std::prev(I)
7890 ->getAssociatedDeclaration()
7891 ->getType()
7892 .getNonReferenceType();
7893 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7894 FirstPointerInComplexData = false;
7895 }
7896 }
7897 }
7898
7899 auto Next = std::next(I);
7900
7901 // We need to generate the addresses and sizes if this is the last
7902 // component, if the component is a pointer or if it is an array section
7903 // whose length can't be proved to be one. If this is a pointer, it
7904 // becomes the base address for the following components.
7905
7906 // A final array section, is one whose length can't be proved to be one.
7907 // If the map item is non-contiguous then we don't treat any array section
7908 // as final array section.
7909 bool IsFinalArraySection =
7910 !IsNonContiguous &&
7911 isFinalArraySectionExpression(I->getAssociatedExpression());
7912
7913 // If we have a declaration for the mapping use that, otherwise use
7914 // the base declaration of the map clause.
7915 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7916 ? I->getAssociatedDeclaration()
7917 : BaseDecl;
7918 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7919 : MapExpr;
7920
7921 // Get information on whether the element is a pointer. Have to do a
7922 // special treatment for array sections given that they are built-in
7923 // types.
7924 const auto *OASE =
7925 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7926 const auto *OAShE =
7927 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7928 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7929 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7930 bool IsPointer =
7931 OAShE ||
7934 ->isAnyPointerType()) ||
7935 I->getAssociatedExpression()->getType()->isAnyPointerType();
7936 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7937 MapDecl &&
7938 MapDecl->getType()->isLValueReferenceType();
7939 bool IsNonDerefPointer = IsPointer &&
7940 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7941 !IsNonContiguous;
7942
7943 if (OASE)
7944 ++DimSize;
7945
7946 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7947 IsFinalArraySection) {
7948 // If this is not the last component, we expect the pointer to be
7949 // associated with an array expression or member expression.
7950 assert((Next == CE ||
7951 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7952 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7953 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7954 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7955 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7956 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7957 "Unexpected expression");
7958
7959 Address LB = Address::invalid();
7960 Address LowestElem = Address::invalid();
7961 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7962 const MemberExpr *E) {
7963 const Expr *BaseExpr = E->getBase();
7964 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7965 // scalar.
7966 LValue BaseLV;
7967 if (E->isArrow()) {
7968 LValueBaseInfo BaseInfo;
7969 TBAAAccessInfo TBAAInfo;
7970 Address Addr =
7971 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7972 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7973 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7974 } else {
7975 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7976 }
7977 return BaseLV;
7978 };
7979 if (OAShE) {
7980 LowestElem = LB =
7981 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7983 OAShE->getBase()->getType()->getPointeeType()),
7985 OAShE->getBase()->getType()));
7986 } else if (IsMemberReference) {
7987 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7988 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7989 LowestElem = CGF.EmitLValueForFieldInitialization(
7990 BaseLVal, cast<FieldDecl>(MapDecl))
7991 .getAddress();
7992 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7993 .getAddress();
7994 } else {
7995 LowestElem = LB =
7996 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7997 .getAddress();
7998 }
7999
8000 // If this component is a pointer inside the base struct then we don't
8001 // need to create any entry for it - it will be combined with the object
8002 // it is pointing to into a single PTR_AND_OBJ entry.
8003 bool IsMemberPointerOrAddr =
8004 EncounteredME &&
8005 (((IsPointer || ForDeviceAddr) &&
8006 I->getAssociatedExpression() == EncounteredME) ||
8007 (IsPrevMemberReference && !IsPointer) ||
8008 (IsMemberReference && Next != CE &&
8009 !Next->getAssociatedExpression()->getType()->isPointerType()));
8010 if (!OverlappedElements.empty() && Next == CE) {
8011 // Handle base element with the info for overlapped elements.
8012 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8013 assert(!IsPointer &&
8014 "Unexpected base element with the pointer type.");
8015 // Mark the whole struct as the struct that requires allocation on the
8016 // device.
8017 PartialStruct.LowestElem = {0, LowestElem};
8018 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8019 I->getAssociatedExpression()->getType());
8020 Address HB = CGF.Builder.CreateConstGEP(
8022 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8023 TypeSize.getQuantity() - 1);
8024 PartialStruct.HighestElem = {
8025 std::numeric_limits<decltype(
8026 PartialStruct.HighestElem.first)>::max(),
8027 HB};
8028 PartialStruct.Base = BP;
8029 PartialStruct.LB = LB;
8030 assert(
8031 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8032 "Overlapped elements must be used only once for the variable.");
8033 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8034 // Emit data for non-overlapped data.
8035 OpenMPOffloadMappingFlags Flags =
8036 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8037 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8038 /*AddPtrFlag=*/false,
8039 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8040 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8041 MapExpr, BP, LB, IsNonContiguous,
8042 DimSize);
8043 // Do bitcopy of all non-overlapped structure elements.
8045 Component : OverlappedElements) {
8046 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8047 Component) {
8048 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8049 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8050 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8051 }
8052 }
8053 }
8054 }
8055 CopyGaps.copyUntilEnd(HB);
8056 break;
8057 }
8058 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8059 // Skip adding an entry in the CurInfo of this combined entry if the
8060 // whole struct is currently being mapped. The struct needs to be added
8061 // in the first position before any data internal to the struct is being
8062 // mapped.
8063 // Skip adding an entry in the CurInfo of this combined entry if the
8064 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8065 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8066 (Next == CE && MapType != OMPC_MAP_unknown)) {
8067 if (!IsMappingWholeStruct) {
8068 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8069 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8070 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8071 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8072 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8073 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8074 Size, CGF.Int64Ty, /*isSigned=*/true));
8075 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8076 : 1);
8077 } else {
8078 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8079 StructBaseCombinedInfo.BasePointers.push_back(
8080 BP.emitRawPointer(CGF));
8081 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8082 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8083 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8084 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8085 Size, CGF.Int64Ty, /*isSigned=*/true));
8086 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8087 IsNonContiguous ? DimSize : 1);
8088 }
8089
8090 // If Mapper is valid, the last component inherits the mapper.
8091 bool HasMapper = Mapper && Next == CE;
8092 if (!IsMappingWholeStruct)
8093 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8094 else
8095 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8096 : nullptr);
8097
8098 // We need to add a pointer flag for each map that comes from the
8099 // same expression except for the first one. We also need to signal
8100 // this map is the first one that relates with the current capture
8101 // (there is a set of entries for each capture).
8102 OpenMPOffloadMappingFlags Flags =
8103 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8104 !IsExpressionFirstInfo || RequiresReference ||
8105 FirstPointerInComplexData || IsMemberReference,
8106 AreBothBasePtrAndPteeMapped ||
8107 (IsCaptureFirstInfo && !RequiresReference),
8108 IsNonContiguous);
8109
8110 if (!IsExpressionFirstInfo || IsMemberReference) {
8111 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8112 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8113 if (IsPointer || (IsMemberReference && Next != CE))
8114 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8115 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8116 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8117 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8118 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8119
8120 if (ShouldBeMemberOf) {
8121 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8122 // should be later updated with the correct value of MEMBER_OF.
8123 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8124 // From now on, all subsequent PTR_AND_OBJ entries should not be
8125 // marked as MEMBER_OF.
8126 ShouldBeMemberOf = false;
8127 }
8128 }
8129
8130 if (!IsMappingWholeStruct)
8131 CombinedInfo.Types.push_back(Flags);
8132 else
8133 StructBaseCombinedInfo.Types.push_back(Flags);
8134 }
8135
8136 // If we have encountered a member expression so far, keep track of the
8137 // mapped member. If the parent is "*this", then the value declaration
8138 // is nullptr.
8139 if (EncounteredME) {
8140 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8141 unsigned FieldIndex = FD->getFieldIndex();
8142
8143 // Update info about the lowest and highest elements for this struct
8144 if (!PartialStruct.Base.isValid()) {
8145 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8146 if (IsFinalArraySection && OASE) {
8147 Address HB =
8148 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8149 .getAddress();
8150 PartialStruct.HighestElem = {FieldIndex, HB};
8151 } else {
8152 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8153 }
8154 PartialStruct.Base = BP;
8155 PartialStruct.LB = BP;
8156 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8157 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8158 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8159 if (IsFinalArraySection && OASE) {
8160 Address HB =
8161 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8162 .getAddress();
8163 PartialStruct.HighestElem = {FieldIndex, HB};
8164 } else {
8165 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8166 }
8167 }
8168 }
8169
8170 // Need to emit combined struct for array sections.
8171 if (IsFinalArraySection || IsNonContiguous)
8172 PartialStruct.IsArraySection = true;
8173
8174 // If we have a final array section, we are done with this expression.
8175 if (IsFinalArraySection)
8176 break;
8177
8178 // The pointer becomes the base for the next element.
8179 if (Next != CE)
8180 BP = IsMemberReference ? LowestElem : LB;
8181 if (!IsPartialMapped)
8182 IsExpressionFirstInfo = false;
8183 IsCaptureFirstInfo = false;
8184 FirstPointerInComplexData = false;
8185 IsPrevMemberReference = IsMemberReference;
8186 } else if (FirstPointerInComplexData) {
8187 QualType Ty = Components.rbegin()
8188 ->getAssociatedDeclaration()
8189 ->getType()
8190 .getNonReferenceType();
8191 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8192 FirstPointerInComplexData = false;
8193 }
8194 }
8195 // If ran into the whole component - allocate the space for the whole
8196 // record.
8197 if (!EncounteredME)
8198 PartialStruct.HasCompleteRecord = true;
8199
8200 if (!IsNonContiguous)
8201 return;
8202
8203 const ASTContext &Context = CGF.getContext();
8204
8205 // For supporting stride in array section, we need to initialize the first
8206 // dimension size as 1, first offset as 0, and first count as 1
8207 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8208 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8209 MapValuesArrayTy CurStrides;
8210 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8211 uint64_t ElementTypeSize;
8212
8213 // Collect Size information for each dimension and get the element size as
8214 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8215 // should be [10, 10] and the first stride is 4 btyes.
8216 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8217 Components) {
8218 const Expr *AssocExpr = Component.getAssociatedExpression();
8219 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8220
8221 if (!OASE)
8222 continue;
8223
8224 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8225 auto *CAT = Context.getAsConstantArrayType(Ty);
8226 auto *VAT = Context.getAsVariableArrayType(Ty);
8227
8228 // We need all the dimension size except for the last dimension.
8229 assert((VAT || CAT || &Component == &*Components.begin()) &&
8230 "Should be either ConstantArray or VariableArray if not the "
8231 "first Component");
8232
8233 // Get element size if CurStrides is empty.
8234 if (CurStrides.empty()) {
8235 const Type *ElementType = nullptr;
8236 if (CAT)
8237 ElementType = CAT->getElementType().getTypePtr();
8238 else if (VAT)
8239 ElementType = VAT->getElementType().getTypePtr();
8240 else
8241 assert(&Component == &*Components.begin() &&
8242 "Only expect pointer (non CAT or VAT) when this is the "
8243 "first Component");
8244 // If ElementType is null, then it means the base is a pointer
8245 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8246 // for next iteration.
8247 if (ElementType) {
8248 // For the case that having pointer as base, we need to remove one
8249 // level of indirection.
8250 if (&Component != &*Components.begin())
8251 ElementType = ElementType->getPointeeOrArrayElementType();
8252 ElementTypeSize =
8253 Context.getTypeSizeInChars(ElementType).getQuantity();
8254 CurStrides.push_back(
8255 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8256 }
8257 }
8258 // Get dimension value except for the last dimension since we don't need
8259 // it.
8260 if (DimSizes.size() < Components.size() - 1) {
8261 if (CAT)
8262 DimSizes.push_back(
8263 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8264 else if (VAT)
8265 DimSizes.push_back(CGF.Builder.CreateIntCast(
8266 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8267 /*IsSigned=*/false));
8268 }
8269 }
8270
8271 // Skip the dummy dimension since we have already have its information.
8272 auto *DI = DimSizes.begin() + 1;
8273 // Product of dimension.
8274 llvm::Value *DimProd =
8275 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8276
8277 // Collect info for non-contiguous. Notice that offset, count, and stride
8278 // are only meaningful for array-section, so we insert a null for anything
8279 // other than array-section.
8280 // Also, the size of offset, count, and stride are not the same as
8281 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8282 // count, and stride are the same as the number of non-contiguous
8283 // declaration in target update to/from clause.
8284 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8285 Components) {
8286 const Expr *AssocExpr = Component.getAssociatedExpression();
8287
8288 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8289 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8290 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8291 /*isSigned=*/false);
8292 CurOffsets.push_back(Offset);
8293 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8294 CurStrides.push_back(CurStrides.back());
8295 continue;
8296 }
8297
8298 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8299
8300 if (!OASE)
8301 continue;
8302
8303 // Offset
8304 const Expr *OffsetExpr = OASE->getLowerBound();
8305 llvm::Value *Offset = nullptr;
8306 if (!OffsetExpr) {
8307 // If offset is absent, then we just set it to zero.
8308 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8309 } else {
8310 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8311 CGF.Int64Ty,
8312 /*isSigned=*/false);
8313 }
8314 CurOffsets.push_back(Offset);
8315
8316 // Count
8317 const Expr *CountExpr = OASE->getLength();
8318 llvm::Value *Count = nullptr;
8319 if (!CountExpr) {
8320 // In Clang, once a high dimension is an array section, we construct all
8321 // the lower dimension as array section, however, for case like
8322 // arr[0:2][2], Clang construct the inner dimension as an array section
8323 // but it actually is not in an array section form according to spec.
8324 if (!OASE->getColonLocFirst().isValid() &&
8325 !OASE->getColonLocSecond().isValid()) {
8326 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8327 } else {
8328 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8329 // When the length is absent it defaults to ⌈(size −
8330 // lower-bound)/stride⌉, where size is the size of the array
8331 // dimension.
8332 const Expr *StrideExpr = OASE->getStride();
8333 llvm::Value *Stride =
8334 StrideExpr
8335 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8336 CGF.Int64Ty, /*isSigned=*/false)
8337 : nullptr;
8338 if (Stride)
8339 Count = CGF.Builder.CreateUDiv(
8340 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8341 else
8342 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8343 }
8344 } else {
8345 Count = CGF.EmitScalarExpr(CountExpr);
8346 }
8347 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8348 CurCounts.push_back(Count);
8349
8350 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8351 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8352 // Offset Count Stride
8353 // D0 0 1 4 (int) <- dummy dimension
8354 // D1 0 2 8 (2 * (1) * 4)
8355 // D2 1 2 20 (1 * (1 * 5) * 4)
8356 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8357 const Expr *StrideExpr = OASE->getStride();
8358 llvm::Value *Stride =
8359 StrideExpr
8360 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8361 CGF.Int64Ty, /*isSigned=*/false)
8362 : nullptr;
8363 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8364 if (Stride)
8365 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8366 else
8367 CurStrides.push_back(DimProd);
8368 if (DI != DimSizes.end())
8369 ++DI;
8370 }
8371
8372 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8373 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8374 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8375 }
8376
8377 /// Return the adjusted map modifiers if the declaration a capture refers to
8378 /// appears in a first-private clause. This is expected to be used only with
8379 /// directives that start with 'target'.
8380 OpenMPOffloadMappingFlags
8381 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8382 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8383
8384 // A first private variable captured by reference will use only the
8385 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8386 // declaration is known as first-private in this handler.
8387 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8388 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8389 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8390 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8391 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8392 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8393 }
8394 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8395 if (I != LambdasMap.end())
8396 // for map(to: lambda): using user specified map type.
8397 return getMapTypeBits(
8398 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8399 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8400 /*AddPtrFlag=*/false,
8401 /*AddIsTargetParamFlag=*/false,
8402 /*isNonContiguous=*/false);
8403 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8404 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8405 }
8406
8407 void getPlainLayout(const CXXRecordDecl *RD,
8408 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8409 bool AsBase) const {
8410 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8411
8412 llvm::StructType *St =
8413 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8414
8415 unsigned NumElements = St->getNumElements();
8416 llvm::SmallVector<
8417 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8418 RecordLayout(NumElements);
8419
8420 // Fill bases.
8421 for (const auto &I : RD->bases()) {
8422 if (I.isVirtual())
8423 continue;
8424
8425 QualType BaseTy = I.getType();
8426 const auto *Base = BaseTy->getAsCXXRecordDecl();
8427 // Ignore empty bases.
8428 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8429 CGF.getContext()
8430 .getASTRecordLayout(Base)
8432 .isZero())
8433 continue;
8434
8435 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8436 RecordLayout[FieldIndex] = Base;
8437 }
8438 // Fill in virtual bases.
8439 for (const auto &I : RD->vbases()) {
8440 QualType BaseTy = I.getType();
8441 // Ignore empty bases.
8442 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8443 continue;
8444
8445 const auto *Base = BaseTy->getAsCXXRecordDecl();
8446 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8447 if (RecordLayout[FieldIndex])
8448 continue;
8449 RecordLayout[FieldIndex] = Base;
8450 }
8451 // Fill in all the fields.
8452 assert(!RD->isUnion() && "Unexpected union.");
8453 for (const auto *Field : RD->fields()) {
8454 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8455 // will fill in later.)
8456 if (!Field->isBitField() &&
8457 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8458 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8459 RecordLayout[FieldIndex] = Field;
8460 }
8461 }
8462 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8463 &Data : RecordLayout) {
8464 if (Data.isNull())
8465 continue;
8466 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8467 getPlainLayout(Base, Layout, /*AsBase=*/true);
8468 else
8469 Layout.push_back(cast<const FieldDecl *>(Data));
8470 }
8471 }
8472
8473 /// Returns the address corresponding to \p PointerExpr.
8474 static Address getAttachPtrAddr(const Expr *PointerExpr,
8475 CodeGenFunction &CGF) {
8476 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8477 Address AttachPtrAddr = Address::invalid();
8478
8479 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8480 // If the pointer is a variable, we can use its address directly.
8481 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8482 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8483 AttachPtrAddr =
8484 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8485 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8486 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8487 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8488 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8489 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8490 assert(UO->getOpcode() == UO_Deref &&
8491 "Unexpected unary-operator on attach-ptr-expr");
8492 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8493 }
8494 assert(AttachPtrAddr.isValid() &&
8495 "Failed to get address for attach pointer expression");
8496 return AttachPtrAddr;
8497 }
8498
8499 /// Get the address of the attach pointer, and a load from it, to get the
8500 /// pointee base address.
8501 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8502 /// contains invalid addresses if \p AttachPtrExpr is null.
8503 static std::pair<Address, Address>
8504 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8505 CodeGenFunction &CGF) {
8506
8507 if (!AttachPtrExpr)
8508 return {Address::invalid(), Address::invalid()};
8509
8510 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8511 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8512
8513 QualType AttachPtrType =
8516
8517 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8518 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8519 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8520
8521 return {AttachPtrAddr, AttachPteeBaseAddr};
8522 }
8523
8524 /// Returns whether an attach entry should be emitted for a map on
8525 /// \p MapBaseDecl on the directive \p CurDir.
8526 static bool
8527 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8528 CodeGenFunction &CGF,
8529 llvm::PointerUnion<const OMPExecutableDirective *,
8530 const OMPDeclareMapperDecl *>
8531 CurDir) {
8532 if (!PointerExpr)
8533 return false;
8534
8535 // Pointer attachment is needed at map-entering time or for declare
8536 // mappers.
8537 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8540 ->getDirectiveKind());
8541 }
8542
8543 /// Computes the attach-ptr expr for \p Components, and updates various maps
8544 /// with the information.
8545 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8546 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8547 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8548 /// AttachPtrExprMap.
8549 void collectAttachPtrExprInfo(
8551 llvm::PointerUnion<const OMPExecutableDirective *,
8552 const OMPDeclareMapperDecl *>
8553 CurDir) {
8554
8555 OpenMPDirectiveKind CurDirectiveID =
8557 ? OMPD_declare_mapper
8558 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8559
8560 const auto &[AttachPtrExpr, Depth] =
8562 CurDirectiveID);
8563
8564 AttachPtrComputationOrderMap.try_emplace(
8565 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8566 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8567 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8568 }
8569
8570 /// Generate all the base pointers, section pointers, sizes, map types, and
8571 /// mappers for the extracted mappable expressions (all included in \a
8572 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8573 /// pair of the relevant declaration and index where it occurs is appended to
8574 /// the device pointers info array.
8575 void generateAllInfoForClauses(
8576 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8577 llvm::OpenMPIRBuilder &OMPBuilder,
8578 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8579 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8580 // We have to process the component lists that relate with the same
8581 // declaration in a single chunk so that we can generate the map flags
8582 // correctly. Therefore, we organize all lists in a map.
8583 enum MapKind { Present, Allocs, Other, Total };
8584 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8585 SmallVector<SmallVector<MapInfo, 8>, 4>>
8586 Info;
8587
8588 // Helper function to fill the information map for the different supported
8589 // clauses.
8590 auto &&InfoGen =
8591 [&Info, &SkipVarSet](
8592 const ValueDecl *D, MapKind Kind,
8594 OpenMPMapClauseKind MapType,
8595 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8596 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8597 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8598 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8599 if (SkipVarSet.contains(D))
8600 return;
8601 auto It = Info.try_emplace(D, Total).first;
8602 It->second[Kind].emplace_back(
8603 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8604 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8605 };
8606
8607 for (const auto *Cl : Clauses) {
8608 const auto *C = dyn_cast<OMPMapClause>(Cl);
8609 if (!C)
8610 continue;
8611 MapKind Kind = Other;
8612 if (llvm::is_contained(C->getMapTypeModifiers(),
8613 OMPC_MAP_MODIFIER_present))
8614 Kind = Present;
8615 else if (C->getMapType() == OMPC_MAP_alloc)
8616 Kind = Allocs;
8617 const auto *EI = C->getVarRefs().begin();
8618 for (const auto L : C->component_lists()) {
8619 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8620 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8621 C->getMapTypeModifiers(), {},
8622 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8623 E);
8624 ++EI;
8625 }
8626 }
8627 for (const auto *Cl : Clauses) {
8628 const auto *C = dyn_cast<OMPToClause>(Cl);
8629 if (!C)
8630 continue;
8631 MapKind Kind = Other;
8632 if (llvm::is_contained(C->getMotionModifiers(),
8633 OMPC_MOTION_MODIFIER_present))
8634 Kind = Present;
8635 const auto *EI = C->getVarRefs().begin();
8636 for (const auto L : C->component_lists()) {
8637 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8638 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8639 C->isImplicit(), std::get<2>(L), *EI);
8640 ++EI;
8641 }
8642 }
8643 for (const auto *Cl : Clauses) {
8644 const auto *C = dyn_cast<OMPFromClause>(Cl);
8645 if (!C)
8646 continue;
8647 MapKind Kind = Other;
8648 if (llvm::is_contained(C->getMotionModifiers(),
8649 OMPC_MOTION_MODIFIER_present))
8650 Kind = Present;
8651 const auto *EI = C->getVarRefs().begin();
8652 for (const auto L : C->component_lists()) {
8653 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8654 C->getMotionModifiers(),
8655 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8656 *EI);
8657 ++EI;
8658 }
8659 }
8660
8661 // Look at the use_device_ptr and use_device_addr clauses information and
8662 // mark the existing map entries as such. If there is no map information for
8663 // an entry in the use_device_ptr and use_device_addr list, we create one
8664 // with map type 'alloc' and zero size section. It is the user fault if that
8665 // was not mapped before. If there is no map information and the pointer is
8666 // a struct member, then we defer the emission of that entry until the whole
8667 // struct has been processed.
8668 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8669 SmallVector<DeferredDevicePtrEntryTy, 4>>
8670 DeferredInfo;
8671 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8672
8673 auto &&UseDeviceDataCombinedInfoGen =
8674 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8675 CodeGenFunction &CGF, bool IsDevAddr) {
8676 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8677 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8678 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8679 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8680 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8681 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8682 UseDeviceDataCombinedInfo.Sizes.push_back(
8683 llvm::Constant::getNullValue(CGF.Int64Ty));
8684 UseDeviceDataCombinedInfo.Types.push_back(
8685 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8686 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8687 };
8688
8689 auto &&MapInfoGen =
8690 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8691 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8693 Components,
8694 bool IsImplicit, bool IsDevAddr) {
8695 // We didn't find any match in our map information - generate a zero
8696 // size array section - if the pointer is a struct member we defer
8697 // this action until the whole struct has been processed.
8698 if (isa<MemberExpr>(IE)) {
8699 // Insert the pointer into Info to be processed by
8700 // generateInfoForComponentList. Because it is a member pointer
8701 // without a pointee, no entry will be generated for it, therefore
8702 // we need to generate one after the whole struct has been
8703 // processed. Nonetheless, generateInfoForComponentList must be
8704 // called to take the pointer into account for the calculation of
8705 // the range of the partial struct.
8706 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8707 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8708 IsDevAddr);
8709 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8710 } else {
8711 llvm::Value *Ptr;
8712 if (IsDevAddr) {
8713 if (IE->isGLValue())
8714 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8715 else
8716 Ptr = CGF.EmitScalarExpr(IE);
8717 } else {
8718 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8719 }
8720 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8721 }
8722 };
8723
8724 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8725 const Expr *IE, bool IsDevAddr) -> bool {
8726 // We potentially have map information for this declaration already.
8727 // Look for the first set of components that refer to it. If found,
8728 // return true.
8729 // If the first component is a member expression, we have to look into
8730 // 'this', which maps to null in the map of map information. Otherwise
8731 // look directly for the information.
8732 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8733 if (It != Info.end()) {
8734 bool Found = false;
8735 for (auto &Data : It->second) {
8736 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8737 return MI.Components.back().getAssociatedDeclaration() == VD;
8738 });
8739 // If we found a map entry, signal that the pointer has to be
8740 // returned and move on to the next declaration. Exclude cases where
8741 // the base pointer is mapped as array subscript, array section or
8742 // array shaping. The base address is passed as a pointer to base in
8743 // this case and cannot be used as a base for use_device_ptr list
8744 // item.
8745 if (CI != Data.end()) {
8746 if (IsDevAddr) {
8747 CI->ForDeviceAddr = IsDevAddr;
8748 CI->ReturnDevicePointer = true;
8749 Found = true;
8750 break;
8751 } else {
8752 auto PrevCI = std::next(CI->Components.rbegin());
8753 const auto *VarD = dyn_cast<VarDecl>(VD);
8755 isa<MemberExpr>(IE) ||
8756 !VD->getType().getNonReferenceType()->isPointerType() ||
8757 PrevCI == CI->Components.rend() ||
8758 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8759 VarD->hasLocalStorage()) {
8760 CI->ForDeviceAddr = IsDevAddr;
8761 CI->ReturnDevicePointer = true;
8762 Found = true;
8763 break;
8764 }
8765 }
8766 }
8767 }
8768 return Found;
8769 }
8770 return false;
8771 };
8772
8773 // Look at the use_device_ptr clause information and mark the existing map
8774 // entries as such. If there is no map information for an entry in the
8775 // use_device_ptr list, we create one with map type 'alloc' and zero size
8776 // section. It is the user fault if that was not mapped before. If there is
8777 // no map information and the pointer is a struct member, then we defer the
8778 // emission of that entry until the whole struct has been processed.
8779 for (const auto *Cl : Clauses) {
8780 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8781 if (!C)
8782 continue;
8783 for (const auto L : C->component_lists()) {
8785 std::get<1>(L);
8786 assert(!Components.empty() &&
8787 "Not expecting empty list of components!");
8788 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8790 const Expr *IE = Components.back().getAssociatedExpression();
8791 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8792 continue;
8793 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8794 /*IsDevAddr=*/false);
8795 }
8796 }
8797
8798 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8799 for (const auto *Cl : Clauses) {
8800 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8801 if (!C)
8802 continue;
8803 for (const auto L : C->component_lists()) {
8805 std::get<1>(L);
8806 assert(!std::get<1>(L).empty() &&
8807 "Not expecting empty list of components!");
8808 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8809 if (!Processed.insert(VD).second)
8810 continue;
8812 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8813 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8814 continue;
8815 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8816 /*IsDevAddr=*/true);
8817 }
8818 }
8819
8820 for (const auto &Data : Info) {
8821 StructRangeInfoTy PartialStruct;
8822 // Current struct information:
8823 MapCombinedInfoTy CurInfo;
8824 // Current struct base information:
8825 MapCombinedInfoTy StructBaseCurInfo;
8826 const Decl *D = Data.first;
8827 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8828 bool HasMapBasePtr = false;
8829 bool HasMapArraySec = false;
8830 if (VD && VD->getType()->isAnyPointerType()) {
8831 for (const auto &M : Data.second) {
8832 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8833 return isa_and_present<DeclRefExpr>(L.VarRef);
8834 });
8835 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8836 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8837 L.VarRef);
8838 });
8839 if (HasMapBasePtr && HasMapArraySec)
8840 break;
8841 }
8842 }
8843 for (const auto &M : Data.second) {
8844 for (const MapInfo &L : M) {
8845 assert(!L.Components.empty() &&
8846 "Not expecting declaration with no component lists.");
8847
8848 // Remember the current base pointer index.
8849 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8850 unsigned StructBasePointersIdx =
8851 StructBaseCurInfo.BasePointers.size();
8852 CurInfo.NonContigInfo.IsNonContiguous =
8853 L.Components.back().isNonContiguous();
8854 generateInfoForComponentList(
8855 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8856 CurInfo, StructBaseCurInfo, PartialStruct,
8857 /*IsFirstComponentList=*/false, L.IsImplicit,
8858 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8859 L.VarRef, /*OverlappedElements*/ {},
8860 HasMapBasePtr && HasMapArraySec);
8861
8862 // If this entry relates to a device pointer, set the relevant
8863 // declaration and add the 'return pointer' flag.
8864 if (L.ReturnDevicePointer) {
8865 // Check whether a value was added to either CurInfo or
8866 // StructBaseCurInfo and error if no value was added to either of
8867 // them:
8868 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8869 StructBasePointersIdx <
8870 StructBaseCurInfo.BasePointers.size()) &&
8871 "Unexpected number of mapped base pointers.");
8872
8873 // Choose a base pointer index which is always valid:
8874 const ValueDecl *RelevantVD =
8875 L.Components.back().getAssociatedDeclaration();
8876 assert(RelevantVD &&
8877 "No relevant declaration related with device pointer??");
8878
8879 // If StructBaseCurInfo has been updated this iteration then work on
8880 // the first new entry added to it i.e. make sure that when multiple
8881 // values are added to any of the lists, the first value added is
8882 // being modified by the assignments below (not the last value
8883 // added).
8884 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8885 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8886 RelevantVD;
8887 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8888 L.ForDeviceAddr ? DeviceInfoTy::Address
8889 : DeviceInfoTy::Pointer;
8890 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8891 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8892 } else {
8893 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8894 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8895 L.ForDeviceAddr ? DeviceInfoTy::Address
8896 : DeviceInfoTy::Pointer;
8897 CurInfo.Types[CurrentBasePointersIdx] |=
8898 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8899 }
8900 }
8901 }
8902 }
8903
8904 // Append any pending zero-length pointers which are struct members and
8905 // used with use_device_ptr or use_device_addr.
8906 auto CI = DeferredInfo.find(Data.first);
8907 if (CI != DeferredInfo.end()) {
8908 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8909 llvm::Value *BasePtr;
8910 llvm::Value *Ptr;
8911 if (L.ForDeviceAddr) {
8912 if (L.IE->isGLValue())
8913 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8914 else
8915 Ptr = this->CGF.EmitScalarExpr(L.IE);
8916 BasePtr = Ptr;
8917 // Entry is RETURN_PARAM. Also, set the placeholder value
8918 // MEMBER_OF=FFFF so that the entry is later updated with the
8919 // correct value of MEMBER_OF.
8920 CurInfo.Types.push_back(
8921 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8922 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8923 } else {
8924 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8925 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8926 L.IE->getExprLoc());
8927 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8928 // placeholder value MEMBER_OF=FFFF so that the entry is later
8929 // updated with the correct value of MEMBER_OF.
8930 CurInfo.Types.push_back(
8931 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8932 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8933 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8934 }
8935 CurInfo.Exprs.push_back(L.VD);
8936 CurInfo.BasePointers.emplace_back(BasePtr);
8937 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8938 CurInfo.DevicePointers.emplace_back(
8939 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8940 CurInfo.Pointers.push_back(Ptr);
8941 CurInfo.Sizes.push_back(
8942 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8943 CurInfo.Mappers.push_back(nullptr);
8944 }
8945 }
8946
8947 // Unify entries in one list making sure the struct mapping precedes the
8948 // individual fields:
8949 MapCombinedInfoTy UnionCurInfo;
8950 UnionCurInfo.append(StructBaseCurInfo);
8951 UnionCurInfo.append(CurInfo);
8952
8953 // If there is an entry in PartialStruct it means we have a struct with
8954 // individual members mapped. Emit an extra combined entry.
8955 if (PartialStruct.Base.isValid()) {
8956 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8957 // Emit a combined entry:
8958 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8959 /*IsMapThis*/ !VD, OMPBuilder, VD);
8960 }
8961
8962 // We need to append the results of this capture to what we already have.
8963 CombinedInfo.append(UnionCurInfo);
8964 }
8965 // Append data for use_device_ptr clauses.
8966 CombinedInfo.append(UseDeviceDataCombinedInfo);
8967 }
8968
8969public:
8970 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8971 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
8972 // Extract firstprivate clause information.
8973 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8974 for (const auto *D : C->varlist())
8975 FirstPrivateDecls.try_emplace(
8976 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8977 // Extract implicit firstprivates from uses_allocators clauses.
8978 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8979 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8980 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8981 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8982 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8983 /*Implicit=*/true);
8984 else if (const auto *VD = dyn_cast<VarDecl>(
8985 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8986 ->getDecl()))
8987 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8988 }
8989 }
8990 // Extract device pointer clause information.
8991 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8992 for (auto L : C->component_lists())
8993 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8994 // Extract device addr clause information.
8995 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8996 for (auto L : C->component_lists())
8997 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8998 // Extract map information.
8999 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9000 if (C->getMapType() != OMPC_MAP_to)
9001 continue;
9002 for (auto L : C->component_lists()) {
9003 const ValueDecl *VD = std::get<0>(L);
9004 const auto *RD = VD ? VD->getType()
9005 .getCanonicalType()
9006 .getNonReferenceType()
9007 ->getAsCXXRecordDecl()
9008 : nullptr;
9009 if (RD && RD->isLambda())
9010 LambdasMap.try_emplace(std::get<0>(L), C);
9011 }
9012 }
9013 }
9014
9015 /// Constructor for the declare mapper directive.
9016 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9017 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9018
9019 /// Generate code for the combined entry if we have a partially mapped struct
9020 /// and take care of the mapping flags of the arguments corresponding to
9021 /// individual struct members.
9022 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9023 MapFlagsArrayTy &CurTypes,
9024 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
9025 llvm::OpenMPIRBuilder &OMPBuilder,
9026 const ValueDecl *VD = nullptr,
9027 unsigned OffsetForMemberOfFlag = 0,
9028 bool NotTargetParams = true) const {
9029 if (CurTypes.size() == 1 &&
9030 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9031 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9032 !PartialStruct.IsArraySection)
9033 return;
9034 Address LBAddr = PartialStruct.LowestElem.second;
9035 Address HBAddr = PartialStruct.HighestElem.second;
9036 if (PartialStruct.HasCompleteRecord) {
9037 LBAddr = PartialStruct.LB;
9038 HBAddr = PartialStruct.LB;
9039 }
9040 CombinedInfo.Exprs.push_back(VD);
9041 // Base is the base of the struct
9042 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9043 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9044 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9045 // Pointer is the address of the lowest element
9046 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9047 const CXXMethodDecl *MD =
9048 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9049 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9050 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9051 // There should not be a mapper for a combined entry.
9052 if (HasBaseClass) {
9053 // OpenMP 5.2 148:21:
9054 // If the target construct is within a class non-static member function,
9055 // and a variable is an accessible data member of the object for which the
9056 // non-static data member function is invoked, the variable is treated as
9057 // if the this[:1] expression had appeared in a map clause with a map-type
9058 // of tofrom.
9059 // Emit this[:1]
9060 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9061 QualType Ty = MD->getFunctionObjectParameterType();
9062 llvm::Value *Size =
9063 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9064 /*isSigned=*/true);
9065 CombinedInfo.Sizes.push_back(Size);
9066 } else {
9067 CombinedInfo.Pointers.push_back(LB);
9068 // Size is (addr of {highest+1} element) - (addr of lowest element)
9069 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9070 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9071 HBAddr.getElementType(), HB, /*Idx0=*/1);
9072 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9073 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9074 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9075 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9076 /*isSigned=*/false);
9077 CombinedInfo.Sizes.push_back(Size);
9078 }
9079 CombinedInfo.Mappers.push_back(nullptr);
9080 // Map type is always TARGET_PARAM, if generate info for captures.
9081 CombinedInfo.Types.push_back(
9082 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9083 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9084 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9085 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9086 // If any element has the present modifier, then make sure the runtime
9087 // doesn't attempt to allocate the struct.
9088 if (CurTypes.end() !=
9089 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9090 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9091 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9092 }))
9093 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9094 // Remove TARGET_PARAM flag from the first element
9095 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9096 // If any element has the ompx_hold modifier, then make sure the runtime
9097 // uses the hold reference count for the struct as a whole so that it won't
9098 // be unmapped by an extra dynamic reference count decrement. Add it to all
9099 // elements as well so the runtime knows which reference count to check
9100 // when determining whether it's time for device-to-host transfers of
9101 // individual elements.
9102 if (CurTypes.end() !=
9103 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9104 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9105 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9106 })) {
9107 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9108 for (auto &M : CurTypes)
9109 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9110 }
9111
9112 // All other current entries will be MEMBER_OF the combined entry
9113 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9114 // 0xFFFF in the MEMBER_OF field).
9115 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9116 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9117 for (auto &M : CurTypes)
9118 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9119 }
9120
9121 /// Generate all the base pointers, section pointers, sizes, map types, and
9122 /// mappers for the extracted mappable expressions (all included in \a
9123 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9124 /// pair of the relevant declaration and index where it occurs is appended to
9125 /// the device pointers info array.
9126 void generateAllInfo(
9127 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9128 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9129 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9130 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9131 "Expect a executable directive");
9132 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9133 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9134 SkipVarSet);
9135 }
9136
9137 /// Generate all the base pointers, section pointers, sizes, map types, and
9138 /// mappers for the extracted map clauses of user-defined mapper (all included
9139 /// in \a CombinedInfo).
9140 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9141 llvm::OpenMPIRBuilder &OMPBuilder) const {
9142 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9143 "Expect a declare mapper directive");
9144 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9145 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9146 OMPBuilder);
9147 }
9148
9149 /// Emit capture info for lambdas for variables captured by reference.
9150 void generateInfoForLambdaCaptures(
9151 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9152 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9153 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9154 const auto *RD = VDType->getAsCXXRecordDecl();
9155 if (!RD || !RD->isLambda())
9156 return;
9157 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9158 CGF.getContext().getDeclAlign(VD));
9159 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9160 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9161 FieldDecl *ThisCapture = nullptr;
9162 RD->getCaptureFields(Captures, ThisCapture);
9163 if (ThisCapture) {
9164 LValue ThisLVal =
9165 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9166 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9167 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9168 VDLVal.getPointer(CGF));
9169 CombinedInfo.Exprs.push_back(VD);
9170 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9171 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9172 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9173 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9174 CombinedInfo.Sizes.push_back(
9175 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9176 CGF.Int64Ty, /*isSigned=*/true));
9177 CombinedInfo.Types.push_back(
9178 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9179 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9180 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9181 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9182 CombinedInfo.Mappers.push_back(nullptr);
9183 }
9184 for (const LambdaCapture &LC : RD->captures()) {
9185 if (!LC.capturesVariable())
9186 continue;
9187 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9188 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9189 continue;
9190 auto It = Captures.find(VD);
9191 assert(It != Captures.end() && "Found lambda capture without field.");
9192 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9193 if (LC.getCaptureKind() == LCK_ByRef) {
9194 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9195 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9196 VDLVal.getPointer(CGF));
9197 CombinedInfo.Exprs.push_back(VD);
9198 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9199 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9200 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9201 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9202 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9203 CGF.getTypeSize(
9205 CGF.Int64Ty, /*isSigned=*/true));
9206 } else {
9207 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9208 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9209 VDLVal.getPointer(CGF));
9210 CombinedInfo.Exprs.push_back(VD);
9211 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9212 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9213 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9214 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9215 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9216 }
9217 CombinedInfo.Types.push_back(
9218 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9219 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9220 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9221 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9222 CombinedInfo.Mappers.push_back(nullptr);
9223 }
9224 }
9225
9226 /// Set correct indices for lambdas captures.
9227 void adjustMemberOfForLambdaCaptures(
9228 llvm::OpenMPIRBuilder &OMPBuilder,
9229 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9230 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9231 MapFlagsArrayTy &Types) const {
9232 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9233 // Set correct member_of idx for all implicit lambda captures.
9234 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9235 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9236 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9237 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9238 continue;
9239 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9240 assert(BasePtr && "Unable to find base lambda address.");
9241 int TgtIdx = -1;
9242 for (unsigned J = I; J > 0; --J) {
9243 unsigned Idx = J - 1;
9244 if (Pointers[Idx] != BasePtr)
9245 continue;
9246 TgtIdx = Idx;
9247 break;
9248 }
9249 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9250 // All other current entries will be MEMBER_OF the combined entry
9251 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9252 // 0xFFFF in the MEMBER_OF field).
9253 OpenMPOffloadMappingFlags MemberOfFlag =
9254 OMPBuilder.getMemberOfFlag(TgtIdx);
9255 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9256 }
9257 }
9258
9259 /// For a capture that has an associated clause, generate the base pointers,
9260 /// section pointers, sizes, map types, and mappers (all included in
9261 /// \a CurCaptureVarInfo).
9262 void generateInfoForCaptureFromClauseInfo(
9263 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9264 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9265 unsigned OffsetForMemberOfFlag) const {
9266 assert(!Cap->capturesVariableArrayType() &&
9267 "Not expecting to generate map info for a variable array type!");
9268
9269 // We need to know when we generating information for the first component
9270 const ValueDecl *VD = Cap->capturesThis()
9271 ? nullptr
9272 : Cap->getCapturedVar()->getCanonicalDecl();
9273
9274 // for map(to: lambda): skip here, processing it in
9275 // generateDefaultMapInfo
9276 if (LambdasMap.count(VD))
9277 return;
9278
9279 // If this declaration appears in a is_device_ptr clause we just have to
9280 // pass the pointer by value. If it is a reference to a declaration, we just
9281 // pass its value.
9282 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9283 CurCaptureVarInfo.Exprs.push_back(VD);
9284 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9285 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9286 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9287 CurCaptureVarInfo.Pointers.push_back(Arg);
9288 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9289 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9290 /*isSigned=*/true));
9291 CurCaptureVarInfo.Types.push_back(
9292 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9293 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9294 CurCaptureVarInfo.Mappers.push_back(nullptr);
9295 return;
9296 }
9297
9298 MapDataArrayTy DeclComponentLists;
9299 // For member fields list in is_device_ptr, store it in
9300 // DeclComponentLists for generating components info.
9302 auto It = DevPointersMap.find(VD);
9303 if (It != DevPointersMap.end())
9304 for (const auto &MCL : It->second)
9305 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9306 /*IsImpicit = */ true, nullptr,
9307 nullptr);
9308 auto I = HasDevAddrsMap.find(VD);
9309 if (I != HasDevAddrsMap.end())
9310 for (const auto &MCL : I->second)
9311 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9312 /*IsImpicit = */ true, nullptr,
9313 nullptr);
9314 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9315 "Expect a executable directive");
9316 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9317 bool HasMapBasePtr = false;
9318 bool HasMapArraySec = false;
9319 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9320 const auto *EI = C->getVarRefs().begin();
9321 for (const auto L : C->decl_component_lists(VD)) {
9322 const ValueDecl *VDecl, *Mapper;
9323 // The Expression is not correct if the mapping is implicit
9324 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9326 std::tie(VDecl, Components, Mapper) = L;
9327 assert(VDecl == VD && "We got information for the wrong declaration??");
9328 assert(!Components.empty() &&
9329 "Not expecting declaration with no component lists.");
9330 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
9331 HasMapBasePtr = true;
9332 if (VD && E && VD->getType()->isAnyPointerType() &&
9334 HasMapArraySec = true;
9335 DeclComponentLists.emplace_back(Components, C->getMapType(),
9336 C->getMapTypeModifiers(),
9337 C->isImplicit(), Mapper, E);
9338 ++EI;
9339 }
9340 }
9341 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9342 const MapData &RHS) {
9343 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9344 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9345 bool HasPresent =
9346 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9347 bool HasAllocs = MapType == OMPC_MAP_alloc;
9348 MapModifiers = std::get<2>(RHS);
9349 MapType = std::get<1>(LHS);
9350 bool HasPresentR =
9351 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9352 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9353 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9354 });
9355
9356 auto GenerateInfoForComponentLists =
9357 [&](ArrayRef<MapData> DeclComponentLists,
9358 bool IsEligibleForTargetParamFlag) {
9359 MapCombinedInfoTy CurInfoForComponentLists;
9360 StructRangeInfoTy PartialStruct;
9361
9362 if (DeclComponentLists.empty())
9363 return;
9364
9365 generateInfoForCaptureFromComponentLists(
9366 VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
9367 IsEligibleForTargetParamFlag,
9368 /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
9369
9370 // If there is an entry in PartialStruct it means we have a
9371 // struct with individual members mapped. Emit an extra combined
9372 // entry.
9373 if (PartialStruct.Base.isValid()) {
9374 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9375 emitCombinedEntry(
9376 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9377 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
9378 OffsetForMemberOfFlag,
9379 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9380 }
9381
9382 // Return if we didn't add any entries.
9383 if (CurInfoForComponentLists.BasePointers.empty())
9384 return;
9385
9386 CurCaptureVarInfo.append(CurInfoForComponentLists);
9387 };
9388
9389 GenerateInfoForComponentLists(DeclComponentLists,
9390 /*IsEligibleForTargetParamFlag=*/true);
9391 }
9392
9393 /// Generate the base pointers, section pointers, sizes, map types, and
9394 /// mappers associated to \a DeclComponentLists for a given capture
9395 /// \a VD (all included in \a CurComponentListInfo).
9396 void generateInfoForCaptureFromComponentLists(
9397 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
9398 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
9399 bool IsListEligibleForTargetParamFlag,
9400 bool AreBothBasePtrAndPteeMapped = false) const {
9401 // Find overlapping elements (including the offset from the base element).
9402 llvm::SmallDenseMap<
9403 const MapData *,
9404 llvm::SmallVector<
9406 4>
9407 OverlappedData;
9408 size_t Count = 0;
9409 for (const MapData &L : DeclComponentLists) {
9411 OpenMPMapClauseKind MapType;
9412 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9413 bool IsImplicit;
9414 const ValueDecl *Mapper;
9415 const Expr *VarRef;
9416 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9417 L;
9418 ++Count;
9419 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9421 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9422 VarRef) = L1;
9423 auto CI = Components.rbegin();
9424 auto CE = Components.rend();
9425 auto SI = Components1.rbegin();
9426 auto SE = Components1.rend();
9427 for (; CI != CE && SI != SE; ++CI, ++SI) {
9428 if (CI->getAssociatedExpression()->getStmtClass() !=
9429 SI->getAssociatedExpression()->getStmtClass())
9430 break;
9431 // Are we dealing with different variables/fields?
9432 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9433 break;
9434 }
9435 // Found overlapping if, at least for one component, reached the head
9436 // of the components list.
9437 if (CI == CE || SI == SE) {
9438 // Ignore it if it is the same component.
9439 if (CI == CE && SI == SE)
9440 continue;
9441 const auto It = (SI == SE) ? CI : SI;
9442 // If one component is a pointer and another one is a kind of
9443 // dereference of this pointer (array subscript, section, dereference,
9444 // etc.), it is not an overlapping.
9445 // Same, if one component is a base and another component is a
9446 // dereferenced pointer memberexpr with the same base.
9447 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9448 (std::prev(It)->getAssociatedDeclaration() &&
9449 std::prev(It)
9450 ->getAssociatedDeclaration()
9451 ->getType()
9452 ->isPointerType()) ||
9453 (It->getAssociatedDeclaration() &&
9454 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9455 std::next(It) != CE && std::next(It) != SE))
9456 continue;
9457 const MapData &BaseData = CI == CE ? L : L1;
9459 SI == SE ? Components : Components1;
9460 OverlappedData[&BaseData].push_back(SubData);
9461 }
9462 }
9463 }
9464 // Sort the overlapped elements for each item.
9465 llvm::SmallVector<const FieldDecl *, 4> Layout;
9466 if (!OverlappedData.empty()) {
9467 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9468 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9469 while (BaseType != OrigType) {
9470 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9471 OrigType = BaseType->getPointeeOrArrayElementType();
9472 }
9473
9474 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9475 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9476 else {
9477 const auto *RD = BaseType->getAsRecordDecl();
9478 Layout.append(RD->field_begin(), RD->field_end());
9479 }
9480 }
9481 for (auto &Pair : OverlappedData) {
9482 llvm::stable_sort(
9483 Pair.getSecond(),
9484 [&Layout](
9487 Second) {
9488 auto CI = First.rbegin();
9489 auto CE = First.rend();
9490 auto SI = Second.rbegin();
9491 auto SE = Second.rend();
9492 for (; CI != CE && SI != SE; ++CI, ++SI) {
9493 if (CI->getAssociatedExpression()->getStmtClass() !=
9494 SI->getAssociatedExpression()->getStmtClass())
9495 break;
9496 // Are we dealing with different variables/fields?
9497 if (CI->getAssociatedDeclaration() !=
9498 SI->getAssociatedDeclaration())
9499 break;
9500 }
9501
9502 // Lists contain the same elements.
9503 if (CI == CE && SI == SE)
9504 return false;
9505
9506 // List with less elements is less than list with more elements.
9507 if (CI == CE || SI == SE)
9508 return CI == CE;
9509
9510 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9511 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9512 if (FD1->getParent() == FD2->getParent())
9513 return FD1->getFieldIndex() < FD2->getFieldIndex();
9514 const auto *It =
9515 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9516 return FD == FD1 || FD == FD2;
9517 });
9518 return *It == FD1;
9519 });
9520 }
9521
9522 // Associated with a capture, because the mapping flags depend on it.
9523 // Go through all of the elements with the overlapped elements.
9524 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9525 MapCombinedInfoTy StructBaseCombinedInfo;
9526 for (const auto &Pair : OverlappedData) {
9527 const MapData &L = *Pair.getFirst();
9529 OpenMPMapClauseKind MapType;
9530 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9531 bool IsImplicit;
9532 const ValueDecl *Mapper;
9533 const Expr *VarRef;
9534 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9535 L;
9536 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9537 OverlappedComponents = Pair.getSecond();
9538 generateInfoForComponentList(
9539 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9540 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9541 /*GenerateAllInfoForClauses*/ false, Mapper,
9542 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9543 AddTargetParamFlag = false;
9544 }
9545 // Go through other elements without overlapped elements.
9546 for (const MapData &L : DeclComponentLists) {
9548 OpenMPMapClauseKind MapType;
9549 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9550 bool IsImplicit;
9551 const ValueDecl *Mapper;
9552 const Expr *VarRef;
9553 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9554 L;
9555 auto It = OverlappedData.find(&L);
9556 if (It == OverlappedData.end())
9557 generateInfoForComponentList(
9558 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9559 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9560 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9561 /*ForDeviceAddr=*/false, VD, VarRef,
9562 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9563 AddTargetParamFlag = false;
9564 }
9565 }
9566
9567 /// Generate the default map information for a given capture \a CI,
9568 /// record field declaration \a RI and captured value \a CV.
9569 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9570 const FieldDecl &RI, llvm::Value *CV,
9571 MapCombinedInfoTy &CombinedInfo) const {
9572 bool IsImplicit = true;
9573 // Do the default mapping.
9574 if (CI.capturesThis()) {
9575 CombinedInfo.Exprs.push_back(nullptr);
9576 CombinedInfo.BasePointers.push_back(CV);
9577 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9578 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9579 CombinedInfo.Pointers.push_back(CV);
9580 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9581 CombinedInfo.Sizes.push_back(
9582 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9583 CGF.Int64Ty, /*isSigned=*/true));
9584 // Default map type.
9585 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9586 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9587 } else if (CI.capturesVariableByCopy()) {
9588 const VarDecl *VD = CI.getCapturedVar();
9589 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9590 CombinedInfo.BasePointers.push_back(CV);
9591 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9592 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9593 CombinedInfo.Pointers.push_back(CV);
9594 if (!RI.getType()->isAnyPointerType()) {
9595 // We have to signal to the runtime captures passed by value that are
9596 // not pointers.
9597 CombinedInfo.Types.push_back(
9598 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9599 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9600 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9601 } else {
9602 // Pointers are implicitly mapped with a zero size and no flags
9603 // (other than first map that is added for all implicit maps).
9604 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9605 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9606 }
9607 auto I = FirstPrivateDecls.find(VD);
9608 if (I != FirstPrivateDecls.end())
9609 IsImplicit = I->getSecond();
9610 } else {
9611 assert(CI.capturesVariable() && "Expected captured reference.");
9612 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9613 QualType ElementType = PtrTy->getPointeeType();
9614 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9615 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9616 // The default map type for a scalar/complex type is 'to' because by
9617 // default the value doesn't have to be retrieved. For an aggregate
9618 // type, the default is 'tofrom'.
9619 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9620 const VarDecl *VD = CI.getCapturedVar();
9621 auto I = FirstPrivateDecls.find(VD);
9622 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9623 CombinedInfo.BasePointers.push_back(CV);
9624 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9625 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9626 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9627 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9628 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9630 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
9631 } else {
9632 CombinedInfo.Pointers.push_back(CV);
9633 }
9634 if (I != FirstPrivateDecls.end())
9635 IsImplicit = I->getSecond();
9636 }
9637 // Every default map produces a single argument which is a target parameter.
9638 CombinedInfo.Types.back() |=
9639 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9640
9641 // Add flag stating this is an implicit map.
9642 if (IsImplicit)
9643 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9644
9645 // No user-defined mapper for default mapping.
9646 CombinedInfo.Mappers.push_back(nullptr);
9647 }
9648};
9649} // anonymous namespace
9650
9651// Try to extract the base declaration from a `this->x` expression if possible.
9653 if (!E)
9654 return nullptr;
9655
9656 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9657 if (const MemberExpr *ME =
9658 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9659 return ME->getMemberDecl();
9660 return nullptr;
9661}
9662
9663/// Emit a string constant containing the names of the values mapped to the
9664/// offloading runtime library.
9665static llvm::Constant *
9666emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9667 MappableExprsHandler::MappingExprInfo &MapExprs) {
9668
9669 uint32_t SrcLocStrSize;
9670 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9671 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9672
9673 SourceLocation Loc;
9674 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9675 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9676 Loc = VD->getLocation();
9677 else
9678 Loc = MapExprs.getMapExpr()->getExprLoc();
9679 } else {
9680 Loc = MapExprs.getMapDecl()->getLocation();
9681 }
9682
9683 std::string ExprName;
9684 if (MapExprs.getMapExpr()) {
9686 llvm::raw_string_ostream OS(ExprName);
9687 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9688 } else {
9689 ExprName = MapExprs.getMapDecl()->getNameAsString();
9690 }
9691
9692 std::string FileName;
9694 if (auto *DbgInfo = CGF.getDebugInfo())
9695 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9696 else
9697 FileName = PLoc.getFilename();
9698 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9699 PLoc.getColumn(), SrcLocStrSize);
9700}
9701/// Emit the arrays used to pass the captures and map information to the
9702/// offloading runtime library. If there is no map or capture information,
9703/// return nullptr by reference.
9705 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9706 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9707 bool IsNonContiguous = false, bool ForEndCall = false) {
9708 CodeGenModule &CGM = CGF.CGM;
9709
9710 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9711 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9712 CGF.AllocaInsertPt->getIterator());
9713 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9714 CGF.Builder.GetInsertPoint());
9715
9716 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9717 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9718 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9719 }
9720 };
9721
9722 auto CustomMapperCB = [&](unsigned int I) {
9723 llvm::Function *MFunc = nullptr;
9724 if (CombinedInfo.Mappers[I]) {
9725 Info.HasMapper = true;
9727 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9728 }
9729 return MFunc;
9730 };
9731 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9732 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9733 IsNonContiguous, ForEndCall, DeviceAddrCB));
9734}
9735
9736/// Check for inner distribute directive.
9737static const OMPExecutableDirective *
9739 const auto *CS = D.getInnermostCapturedStmt();
9740 const auto *Body =
9741 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9742 const Stmt *ChildStmt =
9744
9745 if (const auto *NestedDir =
9746 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9747 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9748 switch (D.getDirectiveKind()) {
9749 case OMPD_target:
9750 // For now, treat 'target' with nested 'teams loop' as if it's
9751 // distributed (target teams distribute).
9752 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9753 return NestedDir;
9754 if (DKind == OMPD_teams) {
9755 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9756 /*IgnoreCaptured=*/true);
9757 if (!Body)
9758 return nullptr;
9759 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9760 if (const auto *NND =
9761 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9762 DKind = NND->getDirectiveKind();
9763 if (isOpenMPDistributeDirective(DKind))
9764 return NND;
9765 }
9766 }
9767 return nullptr;
9768 case OMPD_target_teams:
9769 if (isOpenMPDistributeDirective(DKind))
9770 return NestedDir;
9771 return nullptr;
9772 case OMPD_target_parallel:
9773 case OMPD_target_simd:
9774 case OMPD_target_parallel_for:
9775 case OMPD_target_parallel_for_simd:
9776 return nullptr;
9777 case OMPD_target_teams_distribute:
9778 case OMPD_target_teams_distribute_simd:
9779 case OMPD_target_teams_distribute_parallel_for:
9780 case OMPD_target_teams_distribute_parallel_for_simd:
9781 case OMPD_parallel:
9782 case OMPD_for:
9783 case OMPD_parallel_for:
9784 case OMPD_parallel_master:
9785 case OMPD_parallel_sections:
9786 case OMPD_for_simd:
9787 case OMPD_parallel_for_simd:
9788 case OMPD_cancel:
9789 case OMPD_cancellation_point:
9790 case OMPD_ordered:
9791 case OMPD_threadprivate:
9792 case OMPD_allocate:
9793 case OMPD_task:
9794 case OMPD_simd:
9795 case OMPD_tile:
9796 case OMPD_unroll:
9797 case OMPD_sections:
9798 case OMPD_section:
9799 case OMPD_single:
9800 case OMPD_master:
9801 case OMPD_critical:
9802 case OMPD_taskyield:
9803 case OMPD_barrier:
9804 case OMPD_taskwait:
9805 case OMPD_taskgroup:
9806 case OMPD_atomic:
9807 case OMPD_flush:
9808 case OMPD_depobj:
9809 case OMPD_scan:
9810 case OMPD_teams:
9811 case OMPD_target_data:
9812 case OMPD_target_exit_data:
9813 case OMPD_target_enter_data:
9814 case OMPD_distribute:
9815 case OMPD_distribute_simd:
9816 case OMPD_distribute_parallel_for:
9817 case OMPD_distribute_parallel_for_simd:
9818 case OMPD_teams_distribute:
9819 case OMPD_teams_distribute_simd:
9820 case OMPD_teams_distribute_parallel_for:
9821 case OMPD_teams_distribute_parallel_for_simd:
9822 case OMPD_target_update:
9823 case OMPD_declare_simd:
9824 case OMPD_declare_variant:
9825 case OMPD_begin_declare_variant:
9826 case OMPD_end_declare_variant:
9827 case OMPD_declare_target:
9828 case OMPD_end_declare_target:
9829 case OMPD_declare_reduction:
9830 case OMPD_declare_mapper:
9831 case OMPD_taskloop:
9832 case OMPD_taskloop_simd:
9833 case OMPD_master_taskloop:
9834 case OMPD_master_taskloop_simd:
9835 case OMPD_parallel_master_taskloop:
9836 case OMPD_parallel_master_taskloop_simd:
9837 case OMPD_requires:
9838 case OMPD_metadirective:
9839 case OMPD_unknown:
9840 default:
9841 llvm_unreachable("Unexpected directive.");
9842 }
9843 }
9844
9845 return nullptr;
9846}
9847
9848/// Emit the user-defined mapper function. The code generation follows the
9849/// pattern in the example below.
9850/// \code
9851/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9852/// void *base, void *begin,
9853/// int64_t size, int64_t type,
9854/// void *name = nullptr) {
9855/// // Allocate space for an array section first or add a base/begin for
9856/// // pointer dereference.
9857/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9858/// !maptype.IsDelete)
9859/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9860/// size*sizeof(Ty), clearToFromMember(type));
9861/// // Map members.
9862/// for (unsigned i = 0; i < size; i++) {
9863/// // For each component specified by this mapper:
9864/// for (auto c : begin[i]->all_components) {
9865/// if (c.hasMapper())
9866/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9867/// c.arg_type, c.arg_name);
9868/// else
9869/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9870/// c.arg_begin, c.arg_size, c.arg_type,
9871/// c.arg_name);
9872/// }
9873/// }
9874/// // Delete the array section.
9875/// if (size > 1 && maptype.IsDelete)
9876/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9877/// size*sizeof(Ty), clearToFromMember(type));
9878/// }
9879/// \endcode
9881 CodeGenFunction *CGF) {
9882 if (UDMMap.count(D) > 0)
9883 return;
9884 ASTContext &C = CGM.getContext();
9885 QualType Ty = D->getType();
9886 auto *MapperVarDecl =
9888 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9889 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9890
9891 CodeGenFunction MapperCGF(CGM);
9892 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9893 auto PrivatizeAndGenMapInfoCB =
9894 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9895 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9896 MapperCGF.Builder.restoreIP(CodeGenIP);
9897
9898 // Privatize the declared variable of mapper to be the current array
9899 // element.
9900 Address PtrCurrent(
9901 PtrPHI, ElemTy,
9902 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9903 .getAlignment()
9904 .alignmentOfArrayElement(ElementSize));
9906 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9907 (void)Scope.Privatize();
9908
9909 // Get map clause information.
9910 MappableExprsHandler MEHandler(*D, MapperCGF);
9911 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9912
9913 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9914 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9915 };
9916 if (CGM.getCodeGenOpts().getDebugInfo() !=
9917 llvm::codegenoptions::NoDebugInfo) {
9918 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9919 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9920 FillInfoMap);
9921 }
9922
9923 return CombinedInfo;
9924 };
9925
9926 auto CustomMapperCB = [&](unsigned I) {
9927 llvm::Function *MapperFunc = nullptr;
9928 if (CombinedInfo.Mappers[I]) {
9929 // Call the corresponding mapper function.
9931 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9932 assert(MapperFunc && "Expect a valid mapper function is available.");
9933 }
9934 return MapperFunc;
9935 };
9936
9937 SmallString<64> TyStr;
9938 llvm::raw_svector_ostream Out(TyStr);
9939 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9940 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9941
9942 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
9943 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
9944 UDMMap.try_emplace(D, NewFn);
9945 if (CGF)
9946 FunctionUDMMap[CGF->CurFn].push_back(D);
9947}
9948
9950 const OMPDeclareMapperDecl *D) {
9951 auto I = UDMMap.find(D);
9952 if (I != UDMMap.end())
9953 return I->second;
9955 return UDMMap.lookup(D);
9956}
9957
9960 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9961 const OMPLoopDirective &D)>
9962 SizeEmitter) {
9963 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9964 const OMPExecutableDirective *TD = &D;
9965 // Get nested teams distribute kind directive, if any. For now, treat
9966 // 'target_teams_loop' as if it's really a target_teams_distribute.
9967 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9968 Kind != OMPD_target_teams_loop)
9969 TD = getNestedDistributeDirective(CGM.getContext(), D);
9970 if (!TD)
9971 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9972
9973 const auto *LD = cast<OMPLoopDirective>(TD);
9974 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9975 return NumIterations;
9976 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9977}
9978
9979static void
9980emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9981 const OMPExecutableDirective &D,
9983 bool RequiresOuterTask, const CapturedStmt &CS,
9984 bool OffloadingMandatory, CodeGenFunction &CGF) {
9985 if (OffloadingMandatory) {
9986 CGF.Builder.CreateUnreachable();
9987 } else {
9988 if (RequiresOuterTask) {
9989 CapturedVars.clear();
9990 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9991 }
9992 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9993 CapturedVars);
9994 }
9995}
9996
9997static llvm::Value *emitDeviceID(
9998 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9999 CodeGenFunction &CGF) {
10000 // Emit device ID if any.
10001 llvm::Value *DeviceID;
10002 if (Device.getPointer()) {
10003 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10004 Device.getInt() == OMPC_DEVICE_device_num) &&
10005 "Expected device_num modifier.");
10006 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10007 DeviceID =
10008 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10009 } else {
10010 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10011 }
10012 return DeviceID;
10013}
10014
10015static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
10016 CodeGenFunction &CGF) {
10017 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
10018
10019 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
10020 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
10021 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
10022 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
10023 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
10024 /*isSigned=*/false);
10025 }
10026 return DynCGroupMem;
10027}
10029 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10030 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10031 llvm::OpenMPIRBuilder &OMPBuilder,
10032 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10033 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10034
10035 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10036 auto RI = CS.getCapturedRecordDecl()->field_begin();
10037 auto *CV = CapturedVars.begin();
10039 CE = CS.capture_end();
10040 CI != CE; ++CI, ++RI, ++CV) {
10041 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10042
10043 // VLA sizes are passed to the outlined region by copy and do not have map
10044 // information associated.
10045 if (CI->capturesVariableArrayType()) {
10046 CurInfo.Exprs.push_back(nullptr);
10047 CurInfo.BasePointers.push_back(*CV);
10048 CurInfo.DevicePtrDecls.push_back(nullptr);
10049 CurInfo.DevicePointers.push_back(
10050 MappableExprsHandler::DeviceInfoTy::None);
10051 CurInfo.Pointers.push_back(*CV);
10052 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10053 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10054 // Copy to the device as an argument. No need to retrieve it.
10055 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10056 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10057 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10058 CurInfo.Mappers.push_back(nullptr);
10059 } else {
10060 // If we have any information in the map clause, we use it, otherwise we
10061 // just do a default mapping.
10062 MEHandler.generateInfoForCaptureFromClauseInfo(
10063 CI, *CV, CurInfo, OMPBuilder,
10064 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10065
10066 if (!CI->capturesThis())
10067 MappedVarSet.insert(CI->getCapturedVar());
10068 else
10069 MappedVarSet.insert(nullptr);
10070
10071 if (CurInfo.BasePointers.empty())
10072 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10073
10074 // Generate correct mapping for variables captured by reference in
10075 // lambdas.
10076 if (CI->capturesVariable())
10077 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10078 CurInfo, LambdaPointers);
10079 }
10080 // We expect to have at least an element of information for this capture.
10081 assert(!CurInfo.BasePointers.empty() &&
10082 "Non-existing map pointer for capture!");
10083 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10084 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10085 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10086 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10087 "Inconsistent map information sizes!");
10088
10089 // We need to append the results of this capture to what we already have.
10090 CombinedInfo.append(CurInfo);
10091 }
10092 // Adjust MEMBER_OF flags for the lambdas captures.
10093 MEHandler.adjustMemberOfForLambdaCaptures(
10094 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10095 CombinedInfo.Pointers, CombinedInfo.Types);
10096}
10097static void
10098genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10099 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10100 llvm::OpenMPIRBuilder &OMPBuilder,
10101 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10102 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10103
10104 CodeGenModule &CGM = CGF.CGM;
10105 // Map any list items in a map clause that were not captures because they
10106 // weren't referenced within the construct.
10107 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10108
10109 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10110 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10111 };
10112 if (CGM.getCodeGenOpts().getDebugInfo() !=
10113 llvm::codegenoptions::NoDebugInfo) {
10114 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10115 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10116 FillInfoMap);
10117 }
10118}
10119
10121 const CapturedStmt &CS,
10123 llvm::OpenMPIRBuilder &OMPBuilder,
10124 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10125 // Get mappable expression information.
10126 MappableExprsHandler MEHandler(D, CGF);
10127 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10128
10129 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10130 MappedVarSet, CombinedInfo);
10131 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10132}
10133
10134template <typename ClauseTy>
10135static void
10137 const OMPExecutableDirective &D,
10139 const auto *C = D.getSingleClause<ClauseTy>();
10140 assert(!C->varlist_empty() &&
10141 "ompx_bare requires explicit num_teams and thread_limit");
10143 for (auto *E : C->varlist()) {
10144 llvm::Value *V = CGF.EmitScalarExpr(E);
10145 Values.push_back(
10146 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10147 }
10148}
10149
10151 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10152 const OMPExecutableDirective &D,
10153 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10154 const CapturedStmt &CS, bool OffloadingMandatory,
10155 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10156 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10157 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10158 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10159 const OMPLoopDirective &D)>
10160 SizeEmitter,
10161 CodeGenFunction &CGF, CodeGenModule &CGM) {
10162 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10163
10164 // Fill up the arrays with all the captured variables.
10165 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10167 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10168
10169 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10170 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10171
10172 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10173 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10174 CGF.VoidPtrTy, CGM.getPointerAlign());
10175 InputInfo.PointersArray =
10176 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10177 InputInfo.SizesArray =
10178 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10179 InputInfo.MappersArray =
10180 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10181 MapTypesArray = Info.RTArgs.MapTypesArray;
10182 MapNamesArray = Info.RTArgs.MapNamesArray;
10183
10184 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10185 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10186 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10187 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10188 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10189
10190 if (IsReverseOffloading) {
10191 // Reverse offloading is not supported, so just execute on the host.
10192 // FIXME: This fallback solution is incorrect since it ignores the
10193 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10194 // assert here and ensure SEMA emits an error.
10195 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10196 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10197 return;
10198 }
10199
10200 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10201 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10202
10203 llvm::Value *BasePointersArray =
10204 InputInfo.BasePointersArray.emitRawPointer(CGF);
10205 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10206 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10207 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10208
10209 auto &&EmitTargetCallFallbackCB =
10210 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10211 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10212 -> llvm::OpenMPIRBuilder::InsertPointTy {
10213 CGF.Builder.restoreIP(IP);
10214 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10215 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10216 return CGF.Builder.saveIP();
10217 };
10218
10219 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10222 if (IsBare) {
10225 NumThreads);
10226 } else {
10227 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10228 NumThreads.push_back(
10229 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10230 }
10231
10232 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10233 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10234 llvm::Value *NumIterations =
10235 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10236 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
10237 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10238 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10239
10240 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10241 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10242 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10243
10244 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10245 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10246 DynCGGroupMem, HasNoWait);
10247
10248 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10249 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10250 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10251 RTLoc, AllocaIP));
10252 CGF.Builder.restoreIP(AfterIP);
10253 };
10254
10255 if (RequiresOuterTask)
10256 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10257 else
10258 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10259}
10260
10261static void
10262emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10263 const OMPExecutableDirective &D,
10265 bool RequiresOuterTask, const CapturedStmt &CS,
10266 bool OffloadingMandatory, CodeGenFunction &CGF) {
10267
10268 // Notify that the host version must be executed.
10269 auto &&ElseGen =
10270 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10271 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10272 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10273 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10274 };
10275
10276 if (RequiresOuterTask) {
10278 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10279 } else {
10280 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10281 }
10282}
10283
10286 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10287 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10288 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10289 const OMPLoopDirective &D)>
10290 SizeEmitter) {
10291 if (!CGF.HaveInsertPoint())
10292 return;
10293
10294 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
10295 CGM.getLangOpts().OpenMPOffloadMandatory;
10296
10297 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10298
10299 const bool RequiresOuterTask =
10300 D.hasClausesOfKind<OMPDependClause>() ||
10301 D.hasClausesOfKind<OMPNowaitClause>() ||
10302 D.hasClausesOfKind<OMPInReductionClause>() ||
10303 (CGM.getLangOpts().OpenMP >= 51 &&
10304 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
10305 D.hasClausesOfKind<OMPThreadLimitClause>());
10307 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10308 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10309 PrePostActionTy &) {
10310 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10311 };
10312 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10313
10315 llvm::Value *MapTypesArray = nullptr;
10316 llvm::Value *MapNamesArray = nullptr;
10317
10318 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
10319 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10320 OutlinedFnID, &InputInfo, &MapTypesArray,
10321 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
10322 PrePostActionTy &) {
10323 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
10324 RequiresOuterTask, CS, OffloadingMandatory,
10325 Device, OutlinedFnID, InputInfo, MapTypesArray,
10326 MapNamesArray, SizeEmitter, CGF, CGM);
10327 };
10328
10329 auto &&TargetElseGen =
10330 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10331 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10332 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
10333 CS, OffloadingMandatory, CGF);
10334 };
10335
10336 // If we have a target function ID it means that we need to support
10337 // offloading, otherwise, just execute on the host. We need to execute on host
10338 // regardless of the conditional in the if clause if, e.g., the user do not
10339 // specify target triples.
10340 if (OutlinedFnID) {
10341 if (IfCond) {
10342 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10343 } else {
10344 RegionCodeGenTy ThenRCG(TargetThenGen);
10345 ThenRCG(CGF);
10346 }
10347 } else {
10348 RegionCodeGenTy ElseRCG(TargetElseGen);
10349 ElseRCG(CGF);
10350 }
10351}
10352
10354 StringRef ParentName) {
10355 if (!S)
10356 return;
10357
10358 // Codegen OMP target directives that offload compute to the device.
10359 bool RequiresDeviceCodegen =
10362 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10363
10364 if (RequiresDeviceCodegen) {
10365 const auto &E = *cast<OMPExecutableDirective>(S);
10366
10367 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
10368 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
10369
10370 // Is this a target region that should not be emitted as an entry point? If
10371 // so just signal we are done with this target region.
10372 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10373 return;
10374
10375 switch (E.getDirectiveKind()) {
10376 case OMPD_target:
10379 break;
10380 case OMPD_target_parallel:
10382 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10383 break;
10384 case OMPD_target_teams:
10386 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10387 break;
10388 case OMPD_target_teams_distribute:
10391 break;
10392 case OMPD_target_teams_distribute_simd:
10395 break;
10396 case OMPD_target_parallel_for:
10399 break;
10400 case OMPD_target_parallel_for_simd:
10403 break;
10404 case OMPD_target_simd:
10406 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10407 break;
10408 case OMPD_target_teams_distribute_parallel_for:
10410 CGM, ParentName,
10412 break;
10413 case OMPD_target_teams_distribute_parallel_for_simd:
10416 CGM, ParentName,
10418 break;
10419 case OMPD_target_teams_loop:
10422 break;
10423 case OMPD_target_parallel_loop:
10426 break;
10427 case OMPD_parallel:
10428 case OMPD_for:
10429 case OMPD_parallel_for:
10430 case OMPD_parallel_master:
10431 case OMPD_parallel_sections:
10432 case OMPD_for_simd:
10433 case OMPD_parallel_for_simd:
10434 case OMPD_cancel:
10435 case OMPD_cancellation_point:
10436 case OMPD_ordered:
10437 case OMPD_threadprivate:
10438 case OMPD_allocate:
10439 case OMPD_task:
10440 case OMPD_simd:
10441 case OMPD_tile:
10442 case OMPD_unroll:
10443 case OMPD_sections:
10444 case OMPD_section:
10445 case OMPD_single:
10446 case OMPD_master:
10447 case OMPD_critical:
10448 case OMPD_taskyield:
10449 case OMPD_barrier:
10450 case OMPD_taskwait:
10451 case OMPD_taskgroup:
10452 case OMPD_atomic:
10453 case OMPD_flush:
10454 case OMPD_depobj:
10455 case OMPD_scan:
10456 case OMPD_teams:
10457 case OMPD_target_data:
10458 case OMPD_target_exit_data:
10459 case OMPD_target_enter_data:
10460 case OMPD_distribute:
10461 case OMPD_distribute_simd:
10462 case OMPD_distribute_parallel_for:
10463 case OMPD_distribute_parallel_for_simd:
10464 case OMPD_teams_distribute:
10465 case OMPD_teams_distribute_simd:
10466 case OMPD_teams_distribute_parallel_for:
10467 case OMPD_teams_distribute_parallel_for_simd:
10468 case OMPD_target_update:
10469 case OMPD_declare_simd:
10470 case OMPD_declare_variant:
10471 case OMPD_begin_declare_variant:
10472 case OMPD_end_declare_variant:
10473 case OMPD_declare_target:
10474 case OMPD_end_declare_target:
10475 case OMPD_declare_reduction:
10476 case OMPD_declare_mapper:
10477 case OMPD_taskloop:
10478 case OMPD_taskloop_simd:
10479 case OMPD_master_taskloop:
10480 case OMPD_master_taskloop_simd:
10481 case OMPD_parallel_master_taskloop:
10482 case OMPD_parallel_master_taskloop_simd:
10483 case OMPD_requires:
10484 case OMPD_metadirective:
10485 case OMPD_unknown:
10486 default:
10487 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10488 }
10489 return;
10490 }
10491
10492 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10493 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10494 return;
10495
10496 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10497 return;
10498 }
10499
10500 // If this is a lambda function, look into its body.
10501 if (const auto *L = dyn_cast<LambdaExpr>(S))
10502 S = L->getBody();
10503
10504 // Keep looking for target regions recursively.
10505 for (const Stmt *II : S->children())
10506 scanForTargetRegionsFunctions(II, ParentName);
10507}
10508
10509static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10510 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10511 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10512 if (!DevTy)
10513 return false;
10514 // Do not emit device_type(nohost) functions for the host.
10515 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10516 return true;
10517 // Do not emit device_type(host) functions for the device.
10518 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10519 return true;
10520 return false;
10521}
10522
10524 // If emitting code for the host, we do not process FD here. Instead we do
10525 // the normal code generation.
10526 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10527 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10529 CGM.getLangOpts().OpenMPIsTargetDevice))
10530 return true;
10531 return false;
10532 }
10533
10534 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10535 // Try to detect target regions in the function.
10536 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10537 StringRef Name = CGM.getMangledName(GD);
10540 CGM.getLangOpts().OpenMPIsTargetDevice))
10541 return true;
10542 }
10543
10544 // Do not to emit function if it is not marked as declare target.
10545 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10546 AlreadyEmittedTargetDecls.count(VD) == 0;
10547}
10548
10551 CGM.getLangOpts().OpenMPIsTargetDevice))
10552 return true;
10553
10554 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10555 return false;
10556
10557 // Check if there are Ctors/Dtors in this declaration and look for target
10558 // regions in it. We use the complete variant to produce the kernel name
10559 // mangling.
10560 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10561 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10562 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10563 StringRef ParentName =
10564 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10565 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10566 }
10567 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10568 StringRef ParentName =
10569 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10570 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10571 }
10572 }
10573
10574 // Do not to emit variable if it is not marked as declare target.
10575 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10576 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10577 cast<VarDecl>(GD.getDecl()));
10578 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10579 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10580 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10583 return true;
10584 }
10585 return false;
10586}
10587
10589 llvm::Constant *Addr) {
10590 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10591 !CGM.getLangOpts().OpenMPIsTargetDevice)
10592 return;
10593
10594 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10595 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10596
10597 // If this is an 'extern' declaration we defer to the canonical definition and
10598 // do not emit an offloading entry.
10599 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10600 VD->hasExternalStorage())
10601 return;
10602
10603 if (!Res) {
10604 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10605 // Register non-target variables being emitted in device code (debug info
10606 // may cause this).
10607 StringRef VarName = CGM.getMangledName(VD);
10608 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10609 }
10610 return;
10611 }
10612
10613 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10614 auto LinkageForVariable = [&VD, this]() {
10615 return CGM.getLLVMLinkageVarDefinition(VD);
10616 };
10617
10618 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10619 OMPBuilder.registerTargetGlobalVariable(
10621 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10622 VD->isExternallyVisible(),
10624 VD->getCanonicalDecl()->getBeginLoc()),
10625 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10626 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10627 CGM.getTypes().ConvertTypeForMem(
10628 CGM.getContext().getPointerType(VD->getType())),
10629 Addr);
10630
10631 for (auto *ref : GeneratedRefs)
10632 CGM.addCompilerUsedGlobal(ref);
10633}
10634
10636 if (isa<FunctionDecl>(GD.getDecl()) ||
10638 return emitTargetFunctions(GD);
10639
10640 return emitTargetGlobalVariable(GD);
10641}
10642
10644 for (const VarDecl *VD : DeferredGlobalVariables) {
10645 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10646 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10647 if (!Res)
10648 continue;
10649 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10650 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10652 CGM.EmitGlobal(VD);
10653 } else {
10654 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10655 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10656 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10658 "Expected link clause or to clause with unified memory.");
10659 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10660 }
10661 }
10662}
10663
10665 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10666 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10667 " Expected target-based directive.");
10668}
10669
10671 for (const OMPClause *Clause : D->clauselists()) {
10672 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10674 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10675 } else if (const auto *AC =
10676 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10677 switch (AC->getAtomicDefaultMemOrderKind()) {
10678 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10679 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10680 break;
10681 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10682 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10683 break;
10684 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10685 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10686 break;
10688 break;
10689 }
10690 }
10691 }
10692}
10693
10694llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10696}
10697
10699 LangAS &AS) {
10700 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10701 return false;
10702 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10703 switch(A->getAllocatorType()) {
10704 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10705 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10706 // Not supported, fallback to the default mem space.
10707 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10708 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10709 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10710 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10711 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10712 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10713 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10714 AS = LangAS::Default;
10715 return true;
10716 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10717 llvm_unreachable("Expected predefined allocator for the variables with the "
10718 "static storage.");
10719 }
10720 return false;
10721}
10722
10726
10728 CodeGenModule &CGM)
10729 : CGM(CGM) {
10730 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10731 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10732 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10733 }
10734}
10735
10737 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10738 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10739}
10740
10742 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10743 return true;
10744
10745 const auto *D = cast<FunctionDecl>(GD.getDecl());
10746 // Do not to emit function if it is marked as declare target as it was already
10747 // emitted.
10748 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10749 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10750 if (auto *F = dyn_cast_or_null<llvm::Function>(
10751 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10752 return !F->isDeclaration();
10753 return false;
10754 }
10755 return true;
10756 }
10757
10758 return !AlreadyEmittedTargetDecls.insert(D).second;
10759}
10760
10762 const OMPExecutableDirective &D,
10763 SourceLocation Loc,
10764 llvm::Function *OutlinedFn,
10765 ArrayRef<llvm::Value *> CapturedVars) {
10766 if (!CGF.HaveInsertPoint())
10767 return;
10768
10769 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10771
10772 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10773 llvm::Value *Args[] = {
10774 RTLoc,
10775 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10776 OutlinedFn};
10778 RealArgs.append(std::begin(Args), std::end(Args));
10779 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10780
10781 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10782 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10783 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10784}
10785
10787 const Expr *NumTeams,
10788 const Expr *ThreadLimit,
10789 SourceLocation Loc) {
10790 if (!CGF.HaveInsertPoint())
10791 return;
10792
10793 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10794
10795 llvm::Value *NumTeamsVal =
10796 NumTeams
10797 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10798 CGF.CGM.Int32Ty, /* isSigned = */ true)
10799 : CGF.Builder.getInt32(0);
10800
10801 llvm::Value *ThreadLimitVal =
10802 ThreadLimit
10803 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10804 CGF.CGM.Int32Ty, /* isSigned = */ true)
10805 : CGF.Builder.getInt32(0);
10806
10807 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10808 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10809 ThreadLimitVal};
10810 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10811 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10812 PushNumTeamsArgs);
10813}
10814
10816 const Expr *ThreadLimit,
10817 SourceLocation Loc) {
10818 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10819 llvm::Value *ThreadLimitVal =
10820 ThreadLimit
10821 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10822 CGF.CGM.Int32Ty, /* isSigned = */ true)
10823 : CGF.Builder.getInt32(0);
10824
10825 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10826 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10827 ThreadLimitVal};
10828 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10829 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10830 ThreadLimitArgs);
10831}
10832
10834 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10835 const Expr *Device, const RegionCodeGenTy &CodeGen,
10837 if (!CGF.HaveInsertPoint())
10838 return;
10839
10840 // Action used to replace the default codegen action and turn privatization
10841 // off.
10842 PrePostActionTy NoPrivAction;
10843
10844 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10845
10846 llvm::Value *IfCondVal = nullptr;
10847 if (IfCond)
10848 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10849
10850 // Emit device ID if any.
10851 llvm::Value *DeviceID = nullptr;
10852 if (Device) {
10853 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10854 CGF.Int64Ty, /*isSigned=*/true);
10855 } else {
10856 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10857 }
10858
10859 // Fill up the arrays with all the mapped variables.
10860 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10861 auto GenMapInfoCB =
10862 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10863 CGF.Builder.restoreIP(CodeGenIP);
10864 // Get map clause information.
10865 MappableExprsHandler MEHandler(D, CGF);
10866 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10867
10868 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10869 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10870 };
10871 if (CGM.getCodeGenOpts().getDebugInfo() !=
10872 llvm::codegenoptions::NoDebugInfo) {
10873 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10874 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10875 FillInfoMap);
10876 }
10877
10878 return CombinedInfo;
10879 };
10880 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10881 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10882 CGF.Builder.restoreIP(CodeGenIP);
10883 switch (BodyGenType) {
10884 case BodyGenTy::Priv:
10885 if (!Info.CaptureDeviceAddrMap.empty())
10886 CodeGen(CGF);
10887 break;
10888 case BodyGenTy::DupNoPriv:
10889 if (!Info.CaptureDeviceAddrMap.empty()) {
10890 CodeGen.setAction(NoPrivAction);
10891 CodeGen(CGF);
10892 }
10893 break;
10894 case BodyGenTy::NoPriv:
10895 if (Info.CaptureDeviceAddrMap.empty()) {
10896 CodeGen.setAction(NoPrivAction);
10897 CodeGen(CGF);
10898 }
10899 break;
10900 }
10901 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10902 CGF.Builder.GetInsertPoint());
10903 };
10904
10905 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10906 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10907 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10908 }
10909 };
10910
10911 auto CustomMapperCB = [&](unsigned int I) {
10912 llvm::Function *MFunc = nullptr;
10913 if (CombinedInfo.Mappers[I]) {
10914 Info.HasMapper = true;
10916 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10917 }
10918 return MFunc;
10919 };
10920
10921 // Source location for the ident struct
10922 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10923
10924 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10925 CGF.AllocaInsertPt->getIterator());
10926 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10927 CGF.Builder.GetInsertPoint());
10928 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10929 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10930 cantFail(OMPBuilder.createTargetData(
10931 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10932 CustomMapperCB,
10933 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
10934 CGF.Builder.restoreIP(AfterIP);
10935}
10936
10938 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10939 const Expr *Device) {
10940 if (!CGF.HaveInsertPoint())
10941 return;
10942
10946 "Expecting either target enter, exit data, or update directives.");
10947
10949 llvm::Value *MapTypesArray = nullptr;
10950 llvm::Value *MapNamesArray = nullptr;
10951 // Generate the code for the opening of the data environment.
10952 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10953 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10954 // Emit device ID if any.
10955 llvm::Value *DeviceID = nullptr;
10956 if (Device) {
10957 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10958 CGF.Int64Ty, /*isSigned=*/true);
10959 } else {
10960 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10961 }
10962
10963 // Emit the number of elements in the offloading arrays.
10964 llvm::Constant *PointerNum =
10965 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10966
10967 // Source location for the ident struct
10968 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10969
10970 SmallVector<llvm::Value *, 13> OffloadingArgs(
10971 {RTLoc, DeviceID, PointerNum,
10972 InputInfo.BasePointersArray.emitRawPointer(CGF),
10973 InputInfo.PointersArray.emitRawPointer(CGF),
10974 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10975 InputInfo.MappersArray.emitRawPointer(CGF)});
10976
10977 // Select the right runtime function call for each standalone
10978 // directive.
10979 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10980 RuntimeFunction RTLFn;
10981 switch (D.getDirectiveKind()) {
10982 case OMPD_target_enter_data:
10983 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10984 : OMPRTL___tgt_target_data_begin_mapper;
10985 break;
10986 case OMPD_target_exit_data:
10987 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10988 : OMPRTL___tgt_target_data_end_mapper;
10989 break;
10990 case OMPD_target_update:
10991 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10992 : OMPRTL___tgt_target_data_update_mapper;
10993 break;
10994 case OMPD_parallel:
10995 case OMPD_for:
10996 case OMPD_parallel_for:
10997 case OMPD_parallel_master:
10998 case OMPD_parallel_sections:
10999 case OMPD_for_simd:
11000 case OMPD_parallel_for_simd:
11001 case OMPD_cancel:
11002 case OMPD_cancellation_point:
11003 case OMPD_ordered:
11004 case OMPD_threadprivate:
11005 case OMPD_allocate:
11006 case OMPD_task:
11007 case OMPD_simd:
11008 case OMPD_tile:
11009 case OMPD_unroll:
11010 case OMPD_sections:
11011 case OMPD_section:
11012 case OMPD_single:
11013 case OMPD_master:
11014 case OMPD_critical:
11015 case OMPD_taskyield:
11016 case OMPD_barrier:
11017 case OMPD_taskwait:
11018 case OMPD_taskgroup:
11019 case OMPD_atomic:
11020 case OMPD_flush:
11021 case OMPD_depobj:
11022 case OMPD_scan:
11023 case OMPD_teams:
11024 case OMPD_target_data:
11025 case OMPD_distribute:
11026 case OMPD_distribute_simd:
11027 case OMPD_distribute_parallel_for:
11028 case OMPD_distribute_parallel_for_simd:
11029 case OMPD_teams_distribute:
11030 case OMPD_teams_distribute_simd:
11031 case OMPD_teams_distribute_parallel_for:
11032 case OMPD_teams_distribute_parallel_for_simd:
11033 case OMPD_declare_simd:
11034 case OMPD_declare_variant:
11035 case OMPD_begin_declare_variant:
11036 case OMPD_end_declare_variant:
11037 case OMPD_declare_target:
11038 case OMPD_end_declare_target:
11039 case OMPD_declare_reduction:
11040 case OMPD_declare_mapper:
11041 case OMPD_taskloop:
11042 case OMPD_taskloop_simd:
11043 case OMPD_master_taskloop:
11044 case OMPD_master_taskloop_simd:
11045 case OMPD_parallel_master_taskloop:
11046 case OMPD_parallel_master_taskloop_simd:
11047 case OMPD_target:
11048 case OMPD_target_simd:
11049 case OMPD_target_teams_distribute:
11050 case OMPD_target_teams_distribute_simd:
11051 case OMPD_target_teams_distribute_parallel_for:
11052 case OMPD_target_teams_distribute_parallel_for_simd:
11053 case OMPD_target_teams:
11054 case OMPD_target_parallel:
11055 case OMPD_target_parallel_for:
11056 case OMPD_target_parallel_for_simd:
11057 case OMPD_requires:
11058 case OMPD_metadirective:
11059 case OMPD_unknown:
11060 default:
11061 llvm_unreachable("Unexpected standalone target data directive.");
11062 break;
11063 }
11064 if (HasNowait) {
11065 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11066 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11067 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11068 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11069 }
11070 CGF.EmitRuntimeCall(
11071 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11072 OffloadingArgs);
11073 };
11074
11075 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11076 &MapNamesArray](CodeGenFunction &CGF,
11077 PrePostActionTy &) {
11078 // Fill up the arrays with all the mapped variables.
11079 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11081 MappableExprsHandler MEHandler(D, CGF);
11082 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11083 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11084 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11085
11086 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11087 D.hasClausesOfKind<OMPNowaitClause>();
11088
11089 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11090 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11091 CGF.VoidPtrTy, CGM.getPointerAlign());
11092 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11093 CGM.getPointerAlign());
11094 InputInfo.SizesArray =
11095 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11096 InputInfo.MappersArray =
11097 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11098 MapTypesArray = Info.RTArgs.MapTypesArray;
11099 MapNamesArray = Info.RTArgs.MapNamesArray;
11100 if (RequiresOuterTask)
11101 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11102 else
11103 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11104 };
11105
11106 if (IfCond) {
11107 emitIfClause(CGF, IfCond, TargetThenGen,
11108 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11109 } else {
11110 RegionCodeGenTy ThenRCG(TargetThenGen);
11111 ThenRCG(CGF);
11112 }
11113}
11114
11115namespace {
11116 /// Kind of parameter in a function with 'declare simd' directive.
11117enum ParamKindTy {
11118 Linear,
11119 LinearRef,
11120 LinearUVal,
11121 LinearVal,
11122 Uniform,
11123 Vector,
11124};
11125/// Attribute set of the parameter.
11126struct ParamAttrTy {
11127 ParamKindTy Kind = Vector;
11128 llvm::APSInt StrideOrArg;
11129 llvm::APSInt Alignment;
11130 bool HasVarStride = false;
11131};
11132} // namespace
11133
11134static unsigned evaluateCDTSize(const FunctionDecl *FD,
11135 ArrayRef<ParamAttrTy> ParamAttrs) {
11136 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11137 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11138 // of that clause. The VLEN value must be power of 2.
11139 // In other case the notion of the function`s "characteristic data type" (CDT)
11140 // is used to compute the vector length.
11141 // CDT is defined in the following order:
11142 // a) For non-void function, the CDT is the return type.
11143 // b) If the function has any non-uniform, non-linear parameters, then the
11144 // CDT is the type of the first such parameter.
11145 // c) If the CDT determined by a) or b) above is struct, union, or class
11146 // type which is pass-by-value (except for the type that maps to the
11147 // built-in complex data type), the characteristic data type is int.
11148 // d) If none of the above three cases is applicable, the CDT is int.
11149 // The VLEN is then determined based on the CDT and the size of vector
11150 // register of that ISA for which current vector version is generated. The
11151 // VLEN is computed using the formula below:
11152 // VLEN = sizeof(vector_register) / sizeof(CDT),
11153 // where vector register size specified in section 3.2.1 Registers and the
11154 // Stack Frame of original AMD64 ABI document.
11155 QualType RetType = FD->getReturnType();
11156 if (RetType.isNull())
11157 return 0;
11158 ASTContext &C = FD->getASTContext();
11159 QualType CDT;
11160 if (!RetType.isNull() && !RetType->isVoidType()) {
11161 CDT = RetType;
11162 } else {
11163 unsigned Offset = 0;
11164 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11165 if (ParamAttrs[Offset].Kind == Vector)
11166 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11167 ++Offset;
11168 }
11169 if (CDT.isNull()) {
11170 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11171 if (ParamAttrs[I + Offset].Kind == Vector) {
11172 CDT = FD->getParamDecl(I)->getType();
11173 break;
11174 }
11175 }
11176 }
11177 }
11178 if (CDT.isNull())
11179 CDT = C.IntTy;
11180 CDT = CDT->getCanonicalTypeUnqualified();
11181 if (CDT->isRecordType() || CDT->isUnionType())
11182 CDT = C.IntTy;
11183 return C.getTypeSize(CDT);
11184}
11185
11186/// Mangle the parameter part of the vector function name according to
11187/// their OpenMP classification. The mangling function is defined in
11188/// section 4.5 of the AAVFABI(2021Q1).
11189static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11190 SmallString<256> Buffer;
11191 llvm::raw_svector_ostream Out(Buffer);
11192 for (const auto &ParamAttr : ParamAttrs) {
11193 switch (ParamAttr.Kind) {
11194 case Linear:
11195 Out << 'l';
11196 break;
11197 case LinearRef:
11198 Out << 'R';
11199 break;
11200 case LinearUVal:
11201 Out << 'U';
11202 break;
11203 case LinearVal:
11204 Out << 'L';
11205 break;
11206 case Uniform:
11207 Out << 'u';
11208 break;
11209 case Vector:
11210 Out << 'v';
11211 break;
11212 }
11213 if (ParamAttr.HasVarStride)
11214 Out << "s" << ParamAttr.StrideOrArg;
11215 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11216 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11217 // Don't print the step value if it is not present or if it is
11218 // equal to 1.
11219 if (ParamAttr.StrideOrArg < 0)
11220 Out << 'n' << -ParamAttr.StrideOrArg;
11221 else if (ParamAttr.StrideOrArg != 1)
11222 Out << ParamAttr.StrideOrArg;
11223 }
11224
11225 if (!!ParamAttr.Alignment)
11226 Out << 'a' << ParamAttr.Alignment;
11227 }
11228
11229 return std::string(Out.str());
11230}
11231
11232static void
11233emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11234 const llvm::APSInt &VLENVal,
11235 ArrayRef<ParamAttrTy> ParamAttrs,
11236 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11237 struct ISADataTy {
11238 char ISA;
11239 unsigned VecRegSize;
11240 };
11241 ISADataTy ISAData[] = {
11242 {
11243 'b', 128
11244 }, // SSE
11245 {
11246 'c', 256
11247 }, // AVX
11248 {
11249 'd', 256
11250 }, // AVX2
11251 {
11252 'e', 512
11253 }, // AVX512
11254 };
11256 switch (State) {
11257 case OMPDeclareSimdDeclAttr::BS_Undefined:
11258 Masked.push_back('N');
11259 Masked.push_back('M');
11260 break;
11261 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11262 Masked.push_back('N');
11263 break;
11264 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11265 Masked.push_back('M');
11266 break;
11267 }
11268 for (char Mask : Masked) {
11269 for (const ISADataTy &Data : ISAData) {
11270 SmallString<256> Buffer;
11271 llvm::raw_svector_ostream Out(Buffer);
11272 Out << "_ZGV" << Data.ISA << Mask;
11273 if (!VLENVal) {
11274 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11275 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11276 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11277 } else {
11278 Out << VLENVal;
11279 }
11280 Out << mangleVectorParameters(ParamAttrs);
11281 Out << '_' << Fn->getName();
11282 Fn->addFnAttr(Out.str());
11283 }
11284 }
11285}
11286
11287// This are the Functions that are needed to mangle the name of the
11288// vector functions generated by the compiler, according to the rules
11289// defined in the "Vector Function ABI specifications for AArch64",
11290// available at
11291// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11292
11293/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11294static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11295 QT = QT.getCanonicalType();
11296
11297 if (QT->isVoidType())
11298 return false;
11299
11300 if (Kind == ParamKindTy::Uniform)
11301 return false;
11302
11303 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
11304 return false;
11305
11306 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11307 !QT->isReferenceType())
11308 return false;
11309
11310 return true;
11311}
11312
11313/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11315 QT = QT.getCanonicalType();
11316 unsigned Size = C.getTypeSize(QT);
11317
11318 // Only scalars and complex within 16 bytes wide set PVB to true.
11319 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11320 return false;
11321
11322 if (QT->isFloatingType())
11323 return true;
11324
11325 if (QT->isIntegerType())
11326 return true;
11327
11328 if (QT->isPointerType())
11329 return true;
11330
11331 // TODO: Add support for complex types (section 3.1.2, item 2).
11332
11333 return false;
11334}
11335
11336/// Computes the lane size (LS) of a return type or of an input parameter,
11337/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11338/// TODO: Add support for references, section 3.2.1, item 1.
11339static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11340 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11342 if (getAArch64PBV(PTy, C))
11343 return C.getTypeSize(PTy);
11344 }
11345 if (getAArch64PBV(QT, C))
11346 return C.getTypeSize(QT);
11347
11348 return C.getTypeSize(C.getUIntPtrType());
11349}
11350
11351// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11352// signature of the scalar function, as defined in 3.2.2 of the
11353// AAVFABI.
11354static std::tuple<unsigned, unsigned, bool>
11356 QualType RetType = FD->getReturnType().getCanonicalType();
11357
11358 ASTContext &C = FD->getASTContext();
11359
11360 bool OutputBecomesInput = false;
11361
11363 if (!RetType->isVoidType()) {
11364 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11365 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11366 OutputBecomesInput = true;
11367 }
11368 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11370 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11371 }
11372
11373 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11374 // The LS of a function parameter / return value can only be a power
11375 // of 2, starting from 8 bits, up to 128.
11376 assert(llvm::all_of(Sizes,
11377 [](unsigned Size) {
11378 return Size == 8 || Size == 16 || Size == 32 ||
11379 Size == 64 || Size == 128;
11380 }) &&
11381 "Invalid size");
11382
11383 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
11384 OutputBecomesInput);
11385}
11386
11387// Function used to add the attribute. The parameter `VLEN` is
11388// templated to allow the use of "x" when targeting scalable functions
11389// for SVE.
11390template <typename T>
11391static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11392 char ISA, StringRef ParSeq,
11393 StringRef MangledName, bool OutputBecomesInput,
11394 llvm::Function *Fn) {
11395 SmallString<256> Buffer;
11396 llvm::raw_svector_ostream Out(Buffer);
11397 Out << Prefix << ISA << LMask << VLEN;
11398 if (OutputBecomesInput)
11399 Out << "v";
11400 Out << ParSeq << "_" << MangledName;
11401 Fn->addFnAttr(Out.str());
11402}
11403
11404// Helper function to generate the Advanced SIMD names depending on
11405// the value of the NDS when simdlen is not present.
11406static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11407 StringRef Prefix, char ISA,
11408 StringRef ParSeq, StringRef MangledName,
11409 bool OutputBecomesInput,
11410 llvm::Function *Fn) {
11411 switch (NDS) {
11412 case 8:
11413 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11414 OutputBecomesInput, Fn);
11415 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11416 OutputBecomesInput, Fn);
11417 break;
11418 case 16:
11419 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11420 OutputBecomesInput, Fn);
11421 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11422 OutputBecomesInput, Fn);
11423 break;
11424 case 32:
11425 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11426 OutputBecomesInput, Fn);
11427 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11428 OutputBecomesInput, Fn);
11429 break;
11430 case 64:
11431 case 128:
11432 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11433 OutputBecomesInput, Fn);
11434 break;
11435 default:
11436 llvm_unreachable("Scalar type is too wide.");
11437 }
11438}
11439
11440/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11442 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11443 ArrayRef<ParamAttrTy> ParamAttrs,
11444 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11445 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11446
11447 // Get basic data for building the vector signature.
11448 const auto Data = getNDSWDS(FD, ParamAttrs);
11449 const unsigned NDS = std::get<0>(Data);
11450 const unsigned WDS = std::get<1>(Data);
11451 const bool OutputBecomesInput = std::get<2>(Data);
11452
11453 // Check the values provided via `simdlen` by the user.
11454 // 1. A `simdlen(1)` doesn't produce vector signatures,
11455 if (UserVLEN == 1) {
11456 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11458 "The clause simdlen(1) has no effect when targeting aarch64.");
11459 CGM.getDiags().Report(SLoc, DiagID);
11460 return;
11461 }
11462
11463 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11464 // Advanced SIMD output.
11465 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11466 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11467 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11468 "power of 2 when targeting Advanced SIMD.");
11469 CGM.getDiags().Report(SLoc, DiagID);
11470 return;
11471 }
11472
11473 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11474 // limits.
11475 if (ISA == 's' && UserVLEN != 0) {
11476 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11477 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11478 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11479 "lanes in the architectural constraints "
11480 "for SVE (min is 128-bit, max is "
11481 "2048-bit, by steps of 128-bit)");
11482 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11483 return;
11484 }
11485 }
11486
11487 // Sort out parameter sequence.
11488 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11489 StringRef Prefix = "_ZGV";
11490 // Generate simdlen from user input (if any).
11491 if (UserVLEN) {
11492 if (ISA == 's') {
11493 // SVE generates only a masked function.
11494 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11495 OutputBecomesInput, Fn);
11496 } else {
11497 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11498 // Advanced SIMD generates one or two functions, depending on
11499 // the `[not]inbranch` clause.
11500 switch (State) {
11501 case OMPDeclareSimdDeclAttr::BS_Undefined:
11502 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11503 OutputBecomesInput, Fn);
11504 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11505 OutputBecomesInput, Fn);
11506 break;
11507 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11508 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11509 OutputBecomesInput, Fn);
11510 break;
11511 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11512 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11513 OutputBecomesInput, Fn);
11514 break;
11515 }
11516 }
11517 } else {
11518 // If no user simdlen is provided, follow the AAVFABI rules for
11519 // generating the vector length.
11520 if (ISA == 's') {
11521 // SVE, section 3.4.1, item 1.
11522 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11523 OutputBecomesInput, Fn);
11524 } else {
11525 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11526 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11527 // two vector names depending on the use of the clause
11528 // `[not]inbranch`.
11529 switch (State) {
11530 case OMPDeclareSimdDeclAttr::BS_Undefined:
11531 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11532 OutputBecomesInput, Fn);
11533 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11534 OutputBecomesInput, Fn);
11535 break;
11536 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11537 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11538 OutputBecomesInput, Fn);
11539 break;
11540 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11541 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11542 OutputBecomesInput, Fn);
11543 break;
11544 }
11545 }
11546 }
11547}
11548
11550 llvm::Function *Fn) {
11551 ASTContext &C = CGM.getContext();
11552 FD = FD->getMostRecentDecl();
11553 while (FD) {
11554 // Map params to their positions in function decl.
11555 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11556 if (isa<CXXMethodDecl>(FD))
11557 ParamPositions.try_emplace(FD, 0);
11558 unsigned ParamPos = ParamPositions.size();
11559 for (const ParmVarDecl *P : FD->parameters()) {
11560 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11561 ++ParamPos;
11562 }
11563 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11564 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11565 // Mark uniform parameters.
11566 for (const Expr *E : Attr->uniforms()) {
11567 E = E->IgnoreParenImpCasts();
11568 unsigned Pos;
11569 if (isa<CXXThisExpr>(E)) {
11570 Pos = ParamPositions[FD];
11571 } else {
11572 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11573 ->getCanonicalDecl();
11574 auto It = ParamPositions.find(PVD);
11575 assert(It != ParamPositions.end() && "Function parameter not found");
11576 Pos = It->second;
11577 }
11578 ParamAttrs[Pos].Kind = Uniform;
11579 }
11580 // Get alignment info.
11581 auto *NI = Attr->alignments_begin();
11582 for (const Expr *E : Attr->aligneds()) {
11583 E = E->IgnoreParenImpCasts();
11584 unsigned Pos;
11585 QualType ParmTy;
11586 if (isa<CXXThisExpr>(E)) {
11587 Pos = ParamPositions[FD];
11588 ParmTy = E->getType();
11589 } else {
11590 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11591 ->getCanonicalDecl();
11592 auto It = ParamPositions.find(PVD);
11593 assert(It != ParamPositions.end() && "Function parameter not found");
11594 Pos = It->second;
11595 ParmTy = PVD->getType();
11596 }
11597 ParamAttrs[Pos].Alignment =
11598 (*NI)
11599 ? (*NI)->EvaluateKnownConstInt(C)
11600 : llvm::APSInt::getUnsigned(
11601 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11602 .getQuantity());
11603 ++NI;
11604 }
11605 // Mark linear parameters.
11606 auto *SI = Attr->steps_begin();
11607 auto *MI = Attr->modifiers_begin();
11608 for (const Expr *E : Attr->linears()) {
11609 E = E->IgnoreParenImpCasts();
11610 unsigned Pos;
11611 bool IsReferenceType = false;
11612 // Rescaling factor needed to compute the linear parameter
11613 // value in the mangled name.
11614 unsigned PtrRescalingFactor = 1;
11615 if (isa<CXXThisExpr>(E)) {
11616 Pos = ParamPositions[FD];
11617 auto *P = cast<PointerType>(E->getType());
11618 PtrRescalingFactor = CGM.getContext()
11619 .getTypeSizeInChars(P->getPointeeType())
11620 .getQuantity();
11621 } else {
11622 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11623 ->getCanonicalDecl();
11624 auto It = ParamPositions.find(PVD);
11625 assert(It != ParamPositions.end() && "Function parameter not found");
11626 Pos = It->second;
11627 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11628 PtrRescalingFactor = CGM.getContext()
11629 .getTypeSizeInChars(P->getPointeeType())
11630 .getQuantity();
11631 else if (PVD->getType()->isReferenceType()) {
11632 IsReferenceType = true;
11633 PtrRescalingFactor =
11634 CGM.getContext()
11635 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11636 .getQuantity();
11637 }
11638 }
11639 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11640 if (*MI == OMPC_LINEAR_ref)
11641 ParamAttr.Kind = LinearRef;
11642 else if (*MI == OMPC_LINEAR_uval)
11643 ParamAttr.Kind = LinearUVal;
11644 else if (IsReferenceType)
11645 ParamAttr.Kind = LinearVal;
11646 else
11647 ParamAttr.Kind = Linear;
11648 // Assuming a stride of 1, for `linear` without modifiers.
11649 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11650 if (*SI) {
11652 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11653 if (const auto *DRE =
11654 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11655 if (const auto *StridePVD =
11656 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11657 ParamAttr.HasVarStride = true;
11658 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11659 assert(It != ParamPositions.end() &&
11660 "Function parameter not found");
11661 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11662 }
11663 }
11664 } else {
11665 ParamAttr.StrideOrArg = Result.Val.getInt();
11666 }
11667 }
11668 // If we are using a linear clause on a pointer, we need to
11669 // rescale the value of linear_step with the byte size of the
11670 // pointee type.
11671 if (!ParamAttr.HasVarStride &&
11672 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11673 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11674 ++SI;
11675 ++MI;
11676 }
11677 llvm::APSInt VLENVal;
11678 SourceLocation ExprLoc;
11679 const Expr *VLENExpr = Attr->getSimdlen();
11680 if (VLENExpr) {
11681 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11682 ExprLoc = VLENExpr->getExprLoc();
11683 }
11684 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11685 if (CGM.getTriple().isX86()) {
11686 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11687 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11688 unsigned VLEN = VLENVal.getExtValue();
11689 StringRef MangledName = Fn->getName();
11690 if (CGM.getTarget().hasFeature("sve"))
11691 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11692 MangledName, 's', 128, Fn, ExprLoc);
11693 else if (CGM.getTarget().hasFeature("neon"))
11694 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11695 MangledName, 'n', 128, Fn, ExprLoc);
11696 }
11697 }
11698 FD = FD->getPreviousDecl();
11699 }
11700}
11701
11702namespace {
11703/// Cleanup action for doacross support.
11704class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11705public:
11706 static const int DoacrossFinArgs = 2;
11707
11708private:
11709 llvm::FunctionCallee RTLFn;
11710 llvm::Value *Args[DoacrossFinArgs];
11711
11712public:
11713 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11714 ArrayRef<llvm::Value *> CallArgs)
11715 : RTLFn(RTLFn) {
11716 assert(CallArgs.size() == DoacrossFinArgs);
11717 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11718 }
11719 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11720 if (!CGF.HaveInsertPoint())
11721 return;
11722 CGF.EmitRuntimeCall(RTLFn, Args);
11723 }
11724};
11725} // namespace
11726
11728 const OMPLoopDirective &D,
11729 ArrayRef<Expr *> NumIterations) {
11730 if (!CGF.HaveInsertPoint())
11731 return;
11732
11733 ASTContext &C = CGM.getContext();
11734 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11735 RecordDecl *RD;
11736 if (KmpDimTy.isNull()) {
11737 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11738 // kmp_int64 lo; // lower
11739 // kmp_int64 up; // upper
11740 // kmp_int64 st; // stride
11741 // };
11742 RD = C.buildImplicitRecord("kmp_dim");
11743 RD->startDefinition();
11744 addFieldToRecordDecl(C, RD, Int64Ty);
11745 addFieldToRecordDecl(C, RD, Int64Ty);
11746 addFieldToRecordDecl(C, RD, Int64Ty);
11747 RD->completeDefinition();
11748 KmpDimTy = C.getCanonicalTagType(RD);
11749 } else {
11750 RD = KmpDimTy->castAsRecordDecl();
11751 }
11752 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11753 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11755
11756 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11757 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11758 enum { LowerFD = 0, UpperFD, StrideFD };
11759 // Fill dims with data.
11760 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11761 LValue DimsLVal = CGF.MakeAddrLValue(
11762 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11763 // dims.upper = num_iterations;
11764 LValue UpperLVal = CGF.EmitLValueForField(
11765 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11766 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11767 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11768 Int64Ty, NumIterations[I]->getExprLoc());
11769 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11770 // dims.stride = 1;
11771 LValue StrideLVal = CGF.EmitLValueForField(
11772 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11773 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11774 StrideLVal);
11775 }
11776
11777 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11778 // kmp_int32 num_dims, struct kmp_dim * dims);
11779 llvm::Value *Args[] = {
11780 emitUpdateLocation(CGF, D.getBeginLoc()),
11781 getThreadID(CGF, D.getBeginLoc()),
11782 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11784 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11785 CGM.VoidPtrTy)};
11786
11787 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11788 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11789 CGF.EmitRuntimeCall(RTLFn, Args);
11790 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11791 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11792 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11793 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11794 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11795 llvm::ArrayRef(FiniArgs));
11796}
11797
11798template <typename T>
11800 const T *C, llvm::Value *ULoc,
11801 llvm::Value *ThreadID) {
11802 QualType Int64Ty =
11803 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11804 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11806 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11807 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11808 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11809 const Expr *CounterVal = C->getLoopData(I);
11810 assert(CounterVal);
11811 llvm::Value *CntVal = CGF.EmitScalarConversion(
11812 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11813 CounterVal->getExprLoc());
11814 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11815 /*Volatile=*/false, Int64Ty);
11816 }
11817 llvm::Value *Args[] = {
11818 ULoc, ThreadID,
11819 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11820 llvm::FunctionCallee RTLFn;
11821 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11822 OMPDoacrossKind<T> ODK;
11823 if (ODK.isSource(C)) {
11824 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11825 OMPRTL___kmpc_doacross_post);
11826 } else {
11827 assert(ODK.isSink(C) && "Expect sink modifier.");
11828 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11829 OMPRTL___kmpc_doacross_wait);
11830 }
11831 CGF.EmitRuntimeCall(RTLFn, Args);
11832}
11833
11835 const OMPDependClause *C) {
11837 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11838 getThreadID(CGF, C->getBeginLoc()));
11839}
11840
11842 const OMPDoacrossClause *C) {
11844 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11845 getThreadID(CGF, C->getBeginLoc()));
11846}
11847
11849 llvm::FunctionCallee Callee,
11850 ArrayRef<llvm::Value *> Args) const {
11851 assert(Loc.isValid() && "Outlined function call location must be valid.");
11853
11854 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11855 if (Fn->doesNotThrow()) {
11856 CGF.EmitNounwindRuntimeCall(Fn, Args);
11857 return;
11858 }
11859 }
11860 CGF.EmitRuntimeCall(Callee, Args);
11861}
11862
11864 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11865 ArrayRef<llvm::Value *> Args) const {
11866 emitCall(CGF, Loc, OutlinedFn, Args);
11867}
11868
11870 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11871 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11873}
11874
11876 const VarDecl *NativeParam,
11877 const VarDecl *TargetParam) const {
11878 return CGF.GetAddrOfLocalVar(NativeParam);
11879}
11880
11881/// Return allocator value from expression, or return a null allocator (default
11882/// when no allocator specified).
11883static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11884 const Expr *Allocator) {
11885 llvm::Value *AllocVal;
11886 if (Allocator) {
11887 AllocVal = CGF.EmitScalarExpr(Allocator);
11888 // According to the standard, the original allocator type is a enum
11889 // (integer). Convert to pointer type, if required.
11890 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11891 CGF.getContext().VoidPtrTy,
11892 Allocator->getExprLoc());
11893 } else {
11894 // If no allocator specified, it defaults to the null allocator.
11895 AllocVal = llvm::Constant::getNullValue(
11897 }
11898 return AllocVal;
11899}
11900
11901/// Return the alignment from an allocate directive if present.
11902static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11903 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11904
11905 if (!AllocateAlignment)
11906 return nullptr;
11907
11908 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11909}
11910
11912 const VarDecl *VD) {
11913 if (!VD)
11914 return Address::invalid();
11915 Address UntiedAddr = Address::invalid();
11916 Address UntiedRealAddr = Address::invalid();
11917 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11918 if (It != FunctionToUntiedTaskStackMap.end()) {
11919 const UntiedLocalVarsAddressesMap &UntiedData =
11920 UntiedLocalVarsStack[It->second];
11921 auto I = UntiedData.find(VD);
11922 if (I != UntiedData.end()) {
11923 UntiedAddr = I->second.first;
11924 UntiedRealAddr = I->second.second;
11925 }
11926 }
11927 const VarDecl *CVD = VD->getCanonicalDecl();
11928 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11929 // Use the default allocation.
11930 if (!isAllocatableDecl(VD))
11931 return UntiedAddr;
11932 llvm::Value *Size;
11933 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11934 if (CVD->getType()->isVariablyModifiedType()) {
11935 Size = CGF.getTypeSize(CVD->getType());
11936 // Align the size: ((size + align - 1) / align) * align
11937 Size = CGF.Builder.CreateNUWAdd(
11938 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11939 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11940 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11941 } else {
11942 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11943 Size = CGM.getSize(Sz.alignTo(Align));
11944 }
11945 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11946 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11947 const Expr *Allocator = AA->getAllocator();
11948 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11949 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11951 Args.push_back(ThreadID);
11952 if (Alignment)
11953 Args.push_back(Alignment);
11954 Args.push_back(Size);
11955 Args.push_back(AllocVal);
11956 llvm::omp::RuntimeFunction FnID =
11957 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11958 llvm::Value *Addr = CGF.EmitRuntimeCall(
11959 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11960 getName({CVD->getName(), ".void.addr"}));
11961 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11962 CGM.getModule(), OMPRTL___kmpc_free);
11963 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11965 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11966 if (UntiedAddr.isValid())
11967 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11968
11969 // Cleanup action for allocate support.
11970 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11971 llvm::FunctionCallee RTLFn;
11972 SourceLocation::UIntTy LocEncoding;
11973 Address Addr;
11974 const Expr *AllocExpr;
11975
11976 public:
11977 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11978 SourceLocation::UIntTy LocEncoding, Address Addr,
11979 const Expr *AllocExpr)
11980 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11981 AllocExpr(AllocExpr) {}
11982 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11983 if (!CGF.HaveInsertPoint())
11984 return;
11985 llvm::Value *Args[3];
11986 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11987 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11989 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11990 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11991 Args[2] = AllocVal;
11992 CGF.EmitRuntimeCall(RTLFn, Args);
11993 }
11994 };
11995 Address VDAddr =
11996 UntiedRealAddr.isValid()
11997 ? UntiedRealAddr
11998 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11999 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12000 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12001 VDAddr, Allocator);
12002 if (UntiedRealAddr.isValid())
12003 if (auto *Region =
12004 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12005 Region->emitUntiedSwitch(CGF);
12006 return VDAddr;
12007 }
12008 return UntiedAddr;
12009}
12010
12012 const VarDecl *VD) const {
12013 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12014 if (It == FunctionToUntiedTaskStackMap.end())
12015 return false;
12016 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12017}
12018
12020 CodeGenModule &CGM, const OMPLoopDirective &S)
12021 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12022 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12023 if (!NeedToPush)
12024 return;
12026 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12027 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12028 for (const Stmt *Ref : C->private_refs()) {
12029 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12030 const ValueDecl *VD;
12031 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12032 VD = DRE->getDecl();
12033 } else {
12034 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12035 assert((ME->isImplicitCXXThis() ||
12036 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12037 "Expected member of current class.");
12038 VD = ME->getMemberDecl();
12039 }
12040 DS.insert(VD);
12041 }
12042 }
12043}
12044
12046 if (!NeedToPush)
12047 return;
12048 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12049}
12050
12052 CodeGenFunction &CGF,
12053 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12054 std::pair<Address, Address>> &LocalVars)
12055 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12056 if (!NeedToPush)
12057 return;
12058 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12059 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12060 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12061}
12062
12064 if (!NeedToPush)
12065 return;
12066 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12067}
12068
12070 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12071
12072 return llvm::any_of(
12073 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12074 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12075}
12076
12077void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12078 const OMPExecutableDirective &S,
12079 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12080 const {
12081 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12082 // Vars in target/task regions must be excluded completely.
12083 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12084 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12086 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12087 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12088 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12089 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12090 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12091 }
12092 }
12093 // Exclude vars in private clauses.
12094 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12095 for (const Expr *Ref : C->varlist()) {
12096 if (!Ref->getType()->isScalarType())
12097 continue;
12098 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12099 if (!DRE)
12100 continue;
12101 NeedToCheckForLPCs.insert(DRE->getDecl());
12102 }
12103 }
12104 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12105 for (const Expr *Ref : C->varlist()) {
12106 if (!Ref->getType()->isScalarType())
12107 continue;
12108 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12109 if (!DRE)
12110 continue;
12111 NeedToCheckForLPCs.insert(DRE->getDecl());
12112 }
12113 }
12114 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12115 for (const Expr *Ref : C->varlist()) {
12116 if (!Ref->getType()->isScalarType())
12117 continue;
12118 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12119 if (!DRE)
12120 continue;
12121 NeedToCheckForLPCs.insert(DRE->getDecl());
12122 }
12123 }
12124 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12125 for (const Expr *Ref : C->varlist()) {
12126 if (!Ref->getType()->isScalarType())
12127 continue;
12128 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12129 if (!DRE)
12130 continue;
12131 NeedToCheckForLPCs.insert(DRE->getDecl());
12132 }
12133 }
12134 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12135 for (const Expr *Ref : C->varlist()) {
12136 if (!Ref->getType()->isScalarType())
12137 continue;
12138 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12139 if (!DRE)
12140 continue;
12141 NeedToCheckForLPCs.insert(DRE->getDecl());
12142 }
12143 }
12144 for (const Decl *VD : NeedToCheckForLPCs) {
12145 for (const LastprivateConditionalData &Data :
12146 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12147 if (Data.DeclToUniqueName.count(VD) > 0) {
12148 if (!Data.Disabled)
12149 NeedToAddForLPCsAsDisabled.insert(VD);
12150 break;
12151 }
12152 }
12153 }
12154}
12155
12156CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12157 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12158 : CGM(CGF.CGM),
12159 Action((CGM.getLangOpts().OpenMP >= 50 &&
12160 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12161 [](const OMPLastprivateClause *C) {
12162 return C->getKind() ==
12163 OMPC_LASTPRIVATE_conditional;
12164 }))
12165 ? ActionToDo::PushAsLastprivateConditional
12166 : ActionToDo::DoNotPush) {
12167 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12168 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12169 return;
12170 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12171 "Expected a push action.");
12173 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12174 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12175 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12176 continue;
12177
12178 for (const Expr *Ref : C->varlist()) {
12179 Data.DeclToUniqueName.insert(std::make_pair(
12180 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12181 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12182 }
12183 }
12184 Data.IVLVal = IVLVal;
12185 Data.Fn = CGF.CurFn;
12186}
12187
12188CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12190 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12191 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12192 if (CGM.getLangOpts().OpenMP < 50)
12193 return;
12194 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12195 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12196 if (!NeedToAddForLPCsAsDisabled.empty()) {
12197 Action = ActionToDo::DisableLastprivateConditional;
12198 LastprivateConditionalData &Data =
12200 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12201 Data.DeclToUniqueName.try_emplace(VD);
12202 Data.Fn = CGF.CurFn;
12203 Data.Disabled = true;
12204 }
12205}
12206
12207CGOpenMPRuntime::LastprivateConditionalRAII
12209 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12210 return LastprivateConditionalRAII(CGF, S);
12211}
12212
12214 if (CGM.getLangOpts().OpenMP < 50)
12215 return;
12216 if (Action == ActionToDo::DisableLastprivateConditional) {
12217 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12218 "Expected list of disabled private vars.");
12219 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12220 }
12221 if (Action == ActionToDo::PushAsLastprivateConditional) {
12222 assert(
12223 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12224 "Expected list of lastprivate conditional vars.");
12225 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12226 }
12227}
12228
12230 const VarDecl *VD) {
12231 ASTContext &C = CGM.getContext();
12232 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12233 QualType NewType;
12234 const FieldDecl *VDField;
12235 const FieldDecl *FiredField;
12236 LValue BaseLVal;
12237 auto VI = I->getSecond().find(VD);
12238 if (VI == I->getSecond().end()) {
12239 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12240 RD->startDefinition();
12241 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12242 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12243 RD->completeDefinition();
12244 NewType = C.getCanonicalTagType(RD);
12245 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12246 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12247 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12248 } else {
12249 NewType = std::get<0>(VI->getSecond());
12250 VDField = std::get<1>(VI->getSecond());
12251 FiredField = std::get<2>(VI->getSecond());
12252 BaseLVal = std::get<3>(VI->getSecond());
12253 }
12254 LValue FiredLVal =
12255 CGF.EmitLValueForField(BaseLVal, FiredField);
12257 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12258 FiredLVal);
12259 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12260}
12261
12262namespace {
12263/// Checks if the lastprivate conditional variable is referenced in LHS.
12264class LastprivateConditionalRefChecker final
12265 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12267 const Expr *FoundE = nullptr;
12268 const Decl *FoundD = nullptr;
12269 StringRef UniqueDeclName;
12270 LValue IVLVal;
12271 llvm::Function *FoundFn = nullptr;
12272 SourceLocation Loc;
12273
12274public:
12275 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12277 llvm::reverse(LPM)) {
12278 auto It = D.DeclToUniqueName.find(E->getDecl());
12279 if (It == D.DeclToUniqueName.end())
12280 continue;
12281 if (D.Disabled)
12282 return false;
12283 FoundE = E;
12284 FoundD = E->getDecl()->getCanonicalDecl();
12285 UniqueDeclName = It->second;
12286 IVLVal = D.IVLVal;
12287 FoundFn = D.Fn;
12288 break;
12289 }
12290 return FoundE == E;
12291 }
12292 bool VisitMemberExpr(const MemberExpr *E) {
12294 return false;
12296 llvm::reverse(LPM)) {
12297 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12298 if (It == D.DeclToUniqueName.end())
12299 continue;
12300 if (D.Disabled)
12301 return false;
12302 FoundE = E;
12303 FoundD = E->getMemberDecl()->getCanonicalDecl();
12304 UniqueDeclName = It->second;
12305 IVLVal = D.IVLVal;
12306 FoundFn = D.Fn;
12307 break;
12308 }
12309 return FoundE == E;
12310 }
12311 bool VisitStmt(const Stmt *S) {
12312 for (const Stmt *Child : S->children()) {
12313 if (!Child)
12314 continue;
12315 if (const auto *E = dyn_cast<Expr>(Child))
12316 if (!E->isGLValue())
12317 continue;
12318 if (Visit(Child))
12319 return true;
12320 }
12321 return false;
12322 }
12323 explicit LastprivateConditionalRefChecker(
12324 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12325 : LPM(LPM) {}
12326 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12327 getFoundData() const {
12328 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12329 }
12330};
12331} // namespace
12332
12334 LValue IVLVal,
12335 StringRef UniqueDeclName,
12336 LValue LVal,
12337 SourceLocation Loc) {
12338 // Last updated loop counter for the lastprivate conditional var.
12339 // int<xx> last_iv = 0;
12340 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12341 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12342 LLIVTy, getName({UniqueDeclName, "iv"}));
12343 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12344 IVLVal.getAlignment().getAsAlign());
12345 LValue LastIVLVal =
12346 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12347
12348 // Last value of the lastprivate conditional.
12349 // decltype(priv_a) last_a;
12350 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12351 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12352 cast<llvm::GlobalVariable>(Last)->setAlignment(
12353 LVal.getAlignment().getAsAlign());
12354 LValue LastLVal =
12355 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12356
12357 // Global loop counter. Required to handle inner parallel-for regions.
12358 // iv
12359 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12360
12361 // #pragma omp critical(a)
12362 // if (last_iv <= iv) {
12363 // last_iv = iv;
12364 // last_a = priv_a;
12365 // }
12366 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12367 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12368 Action.Enter(CGF);
12369 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12370 // (last_iv <= iv) ? Check if the variable is updated and store new
12371 // value in global var.
12372 llvm::Value *CmpRes;
12373 if (IVLVal.getType()->isSignedIntegerType()) {
12374 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12375 } else {
12376 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12377 "Loop iteration variable must be integer.");
12378 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12379 }
12380 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12381 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12382 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12383 // {
12384 CGF.EmitBlock(ThenBB);
12385
12386 // last_iv = iv;
12387 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12388
12389 // last_a = priv_a;
12390 switch (CGF.getEvaluationKind(LVal.getType())) {
12391 case TEK_Scalar: {
12392 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12393 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12394 break;
12395 }
12396 case TEK_Complex: {
12397 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12398 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12399 break;
12400 }
12401 case TEK_Aggregate:
12402 llvm_unreachable(
12403 "Aggregates are not supported in lastprivate conditional.");
12404 }
12405 // }
12406 CGF.EmitBranch(ExitBB);
12407 // There is no need to emit line number for unconditional branch.
12409 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12410 };
12411
12412 if (CGM.getLangOpts().OpenMPSimd) {
12413 // Do not emit as a critical region as no parallel region could be emitted.
12414 RegionCodeGenTy ThenRCG(CodeGen);
12415 ThenRCG(CGF);
12416 } else {
12417 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12418 }
12419}
12420
12422 const Expr *LHS) {
12423 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12424 return;
12425 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12426 if (!Checker.Visit(LHS))
12427 return;
12428 const Expr *FoundE;
12429 const Decl *FoundD;
12430 StringRef UniqueDeclName;
12431 LValue IVLVal;
12432 llvm::Function *FoundFn;
12433 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12434 Checker.getFoundData();
12435 if (FoundFn != CGF.CurFn) {
12436 // Special codegen for inner parallel regions.
12437 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12438 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12439 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12440 "Lastprivate conditional is not found in outer region.");
12441 QualType StructTy = std::get<0>(It->getSecond());
12442 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12443 LValue PrivLVal = CGF.EmitLValue(FoundE);
12445 PrivLVal.getAddress(),
12446 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12447 CGF.ConvertTypeForMem(StructTy));
12448 LValue BaseLVal =
12449 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12450 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12451 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12452 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12453 FiredLVal, llvm::AtomicOrdering::Unordered,
12454 /*IsVolatile=*/true, /*isInit=*/false);
12455 return;
12456 }
12457
12458 // Private address of the lastprivate conditional in the current context.
12459 // priv_a
12460 LValue LVal = CGF.EmitLValue(FoundE);
12461 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12462 FoundE->getExprLoc());
12463}
12464
12467 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12468 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12469 return;
12470 auto Range = llvm::reverse(LastprivateConditionalStack);
12471 auto It = llvm::find_if(
12472 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12473 if (It == Range.end() || It->Fn != CGF.CurFn)
12474 return;
12475 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12476 assert(LPCI != LastprivateConditionalToTypes.end() &&
12477 "Lastprivates must be registered already.");
12479 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12480 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12481 for (const auto &Pair : It->DeclToUniqueName) {
12482 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12483 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12484 continue;
12485 auto I = LPCI->getSecond().find(Pair.first);
12486 assert(I != LPCI->getSecond().end() &&
12487 "Lastprivate must be rehistered already.");
12488 // bool Cmp = priv_a.Fired != 0;
12489 LValue BaseLVal = std::get<3>(I->getSecond());
12490 LValue FiredLVal =
12491 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12492 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12493 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12494 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12495 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12496 // if (Cmp) {
12497 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12498 CGF.EmitBlock(ThenBB);
12499 Address Addr = CGF.GetAddrOfLocalVar(VD);
12500 LValue LVal;
12501 if (VD->getType()->isReferenceType())
12502 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12504 else
12505 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12507 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12508 D.getBeginLoc());
12510 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12511 // }
12512 }
12513}
12514
12516 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12517 SourceLocation Loc) {
12518 if (CGF.getLangOpts().OpenMP < 50)
12519 return;
12520 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12521 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12522 "Unknown lastprivate conditional variable.");
12523 StringRef UniqueName = It->second;
12524 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12525 // The variable was not updated in the region - exit.
12526 if (!GV)
12527 return;
12528 LValue LPLVal = CGF.MakeRawAddrLValue(
12529 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12530 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12531 CGF.EmitStoreOfScalar(Res, PrivLVal);
12532}
12533
12536 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12537 const RegionCodeGenTy &CodeGen) {
12538 llvm_unreachable("Not supported in SIMD-only mode");
12539}
12540
12543 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12544 const RegionCodeGenTy &CodeGen) {
12545 llvm_unreachable("Not supported in SIMD-only mode");
12546}
12547
12549 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12550 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12551 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12552 bool Tied, unsigned &NumberOfParts) {
12553 llvm_unreachable("Not supported in SIMD-only mode");
12554}
12555
12557 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
12558 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
12559 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
12560 OpenMPSeverityClauseKind Severity, const Expr *Message) {
12561 llvm_unreachable("Not supported in SIMD-only mode");
12562}
12563
12565 CodeGenFunction &CGF, StringRef CriticalName,
12566 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12567 const Expr *Hint) {
12568 llvm_unreachable("Not supported in SIMD-only mode");
12569}
12570
12572 const RegionCodeGenTy &MasterOpGen,
12573 SourceLocation Loc) {
12574 llvm_unreachable("Not supported in SIMD-only mode");
12575}
12576
12578 const RegionCodeGenTy &MasterOpGen,
12579 SourceLocation Loc,
12580 const Expr *Filter) {
12581 llvm_unreachable("Not supported in SIMD-only mode");
12582}
12583
12585 SourceLocation Loc) {
12586 llvm_unreachable("Not supported in SIMD-only mode");
12587}
12588
12590 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12591 SourceLocation Loc) {
12592 llvm_unreachable("Not supported in SIMD-only mode");
12593}
12594
12596 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12597 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12599 ArrayRef<const Expr *> AssignmentOps) {
12600 llvm_unreachable("Not supported in SIMD-only mode");
12601}
12602
12604 const RegionCodeGenTy &OrderedOpGen,
12605 SourceLocation Loc,
12606 bool IsThreads) {
12607 llvm_unreachable("Not supported in SIMD-only mode");
12608}
12609
12611 SourceLocation Loc,
12613 bool EmitChecks,
12614 bool ForceSimpleCall) {
12615 llvm_unreachable("Not supported in SIMD-only mode");
12616}
12617
12620 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12621 bool Ordered, const DispatchRTInput &DispatchValues) {
12622 llvm_unreachable("Not supported in SIMD-only mode");
12623}
12624
12626 SourceLocation Loc) {
12627 llvm_unreachable("Not supported in SIMD-only mode");
12628}
12629
12632 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12633 llvm_unreachable("Not supported in SIMD-only mode");
12634}
12635
12638 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12639 llvm_unreachable("Not supported in SIMD-only mode");
12640}
12641
12643 SourceLocation Loc,
12644 unsigned IVSize,
12645 bool IVSigned) {
12646 llvm_unreachable("Not supported in SIMD-only mode");
12647}
12648
12650 SourceLocation Loc,
12651 OpenMPDirectiveKind DKind) {
12652 llvm_unreachable("Not supported in SIMD-only mode");
12653}
12654
12656 SourceLocation Loc,
12657 unsigned IVSize, bool IVSigned,
12658 Address IL, Address LB,
12659 Address UB, Address ST) {
12660 llvm_unreachable("Not supported in SIMD-only mode");
12661}
12662
12664 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
12666 SourceLocation SeverityLoc, const Expr *Message,
12667 SourceLocation MessageLoc) {
12668 llvm_unreachable("Not supported in SIMD-only mode");
12669}
12670
12672 ProcBindKind ProcBind,
12673 SourceLocation Loc) {
12674 llvm_unreachable("Not supported in SIMD-only mode");
12675}
12676
12678 const VarDecl *VD,
12679 Address VDAddr,
12680 SourceLocation Loc) {
12681 llvm_unreachable("Not supported in SIMD-only mode");
12682}
12683
12685 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12686 CodeGenFunction *CGF) {
12687 llvm_unreachable("Not supported in SIMD-only mode");
12688}
12689
12691 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12692 llvm_unreachable("Not supported in SIMD-only mode");
12693}
12694
12697 SourceLocation Loc,
12698 llvm::AtomicOrdering AO) {
12699 llvm_unreachable("Not supported in SIMD-only mode");
12700}
12701
12703 const OMPExecutableDirective &D,
12704 llvm::Function *TaskFunction,
12705 QualType SharedsTy, Address Shareds,
12706 const Expr *IfCond,
12707 const OMPTaskDataTy &Data) {
12708 llvm_unreachable("Not supported in SIMD-only mode");
12709}
12710
12713 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12714 const Expr *IfCond, const OMPTaskDataTy &Data) {
12715 llvm_unreachable("Not supported in SIMD-only mode");
12716}
12717
12721 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12722 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12723 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12724 ReductionOps, Options);
12725}
12726
12729 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12730 llvm_unreachable("Not supported in SIMD-only mode");
12731}
12732
12734 SourceLocation Loc,
12735 bool IsWorksharingReduction) {
12736 llvm_unreachable("Not supported in SIMD-only mode");
12737}
12738
12740 SourceLocation Loc,
12741 ReductionCodeGen &RCG,
12742 unsigned N) {
12743 llvm_unreachable("Not supported in SIMD-only mode");
12744}
12745
12747 SourceLocation Loc,
12748 llvm::Value *ReductionsPtr,
12749 LValue SharedLVal) {
12750 llvm_unreachable("Not supported in SIMD-only mode");
12751}
12752
12754 SourceLocation Loc,
12755 const OMPTaskDataTy &Data) {
12756 llvm_unreachable("Not supported in SIMD-only mode");
12757}
12758
12761 OpenMPDirectiveKind CancelRegion) {
12762 llvm_unreachable("Not supported in SIMD-only mode");
12763}
12764
12766 SourceLocation Loc, const Expr *IfCond,
12767 OpenMPDirectiveKind CancelRegion) {
12768 llvm_unreachable("Not supported in SIMD-only mode");
12769}
12770
12772 const OMPExecutableDirective &D, StringRef ParentName,
12773 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12774 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12775 llvm_unreachable("Not supported in SIMD-only mode");
12776}
12777
12780 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12781 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12782 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12783 const OMPLoopDirective &D)>
12784 SizeEmitter) {
12785 llvm_unreachable("Not supported in SIMD-only mode");
12786}
12787
12789 llvm_unreachable("Not supported in SIMD-only mode");
12790}
12791
12793 llvm_unreachable("Not supported in SIMD-only mode");
12794}
12795
12797 return false;
12798}
12799
12801 const OMPExecutableDirective &D,
12802 SourceLocation Loc,
12803 llvm::Function *OutlinedFn,
12804 ArrayRef<llvm::Value *> CapturedVars) {
12805 llvm_unreachable("Not supported in SIMD-only mode");
12806}
12807
12809 const Expr *NumTeams,
12810 const Expr *ThreadLimit,
12811 SourceLocation Loc) {
12812 llvm_unreachable("Not supported in SIMD-only mode");
12813}
12814
12816 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12817 const Expr *Device, const RegionCodeGenTy &CodeGen,
12819 llvm_unreachable("Not supported in SIMD-only mode");
12820}
12821
12823 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12824 const Expr *Device) {
12825 llvm_unreachable("Not supported in SIMD-only mode");
12826}
12827
12829 const OMPLoopDirective &D,
12830 ArrayRef<Expr *> NumIterations) {
12831 llvm_unreachable("Not supported in SIMD-only mode");
12832}
12833
12835 const OMPDependClause *C) {
12836 llvm_unreachable("Not supported in SIMD-only mode");
12837}
12838
12840 const OMPDoacrossClause *C) {
12841 llvm_unreachable("Not supported in SIMD-only mode");
12842}
12843
12844const VarDecl *
12846 const VarDecl *NativeParam) const {
12847 llvm_unreachable("Not supported in SIMD-only mode");
12848}
12849
12850Address
12852 const VarDecl *NativeParam,
12853 const VarDecl *TargetParam) const {
12854 llvm_unreachable("Not supported in SIMD-only mode");
12855}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
SourceManager & getSourceManager()
Definition ASTContext.h:833
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:926
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5265
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3722
Attr - This represents one attribute.
Definition Attr.h:44
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3899
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3933
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1349
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3939
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3927
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3930
This captures a statement into a function.
Definition Stmt.h:3886
const Capture * const_capture_iterator
Definition Stmt.h:4020
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4037
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4007
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:3990
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1475
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4032
capture_range captures()
Definition Stmt.h:4024
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3111
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2394
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2278
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3120
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2251
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5333
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:174
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:242
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2417
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:4916
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:223
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5507
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2202
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2614
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3130
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:293
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1596
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:675
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:186
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
Definition CGExpr.cpp:3078
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1654
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5148
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1712
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1807
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:655
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2092
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2944
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1701
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:739
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:182
CharUnits getAlignment() const
Definition CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:338
Address getAddress() const
Definition CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:346
QualType getType() const
Definition CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:71
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:524
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition Diagnostic.h:905
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3112
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3090
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3085
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3665
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:273
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4296
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4030
Represents a member of a struct/union/class.
Definition Decl.h:3160
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3245
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3396
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4688
Represents a function declaration or definition.
Definition Decl.h:2000
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2797
QualType getReturnType() const
Definition Decl.h:2845
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2774
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3735
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3814
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5521
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:971
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3298
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3381
Expr * getBase() const
Definition Expr.h:3375
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
This represents clause 'affinity' in the 'pragma omp task'-based directives.
ArrayRef< MappableComponent > MappableExprComponentListRef
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
SourceLocation getBeginLoc() const
Returns the starting location of the clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
This represents clause 'in_reduction' in the 'pragma omp task' directives.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5446
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents clause 'lastprivate' in the 'pragma omp ...' directives.
This represents the 'message' clause in the 'pragma omp error' and the 'pragma omp parallel' directiv...
Expr * getMessageString() const
Returns message string of the clause.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'nowait' clause in the 'pragma omp ...' directive.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'ordered' clause in the 'pragma omp ...' directive.
This represents clause 'private' in the 'pragma omp ...' directives.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents the 'severity' clause in the 'pragma omp error' and the 'pragma omp parallel' directi...
OpenMPSeverityClauseKind getSeverityKind() const
Returns kind of the clause.
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1790
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3328
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8294
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8334
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8479
QualType getCanonicalType() const
Definition TypeBase.h:8346
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4312
field_iterator field_end() const
Definition Decl.h:4518
field_range fields() const
Definition Decl.h:4515
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5217
bool field_empty() const
Definition Decl.h:4523
field_iterator field_begin() const
Definition Decl.cpp:5201
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:85
child_range children()
Definition Stmt.cpp:295
StmtClass getStmtClass() const
Definition Stmt.h:1472
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:334
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:346
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4894
bool isUnion() const
Definition Decl.h:3922
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8887
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9067
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2205
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8630
bool isPointerType() const
Definition TypeBase.h:8531
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8931
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9174
bool isReferenceType() const
Definition TypeBase.h:8555
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
bool isLValueReferenceType() const
Definition TypeBase.h:8559
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2411
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3119
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9060
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2800
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9160
bool isFloatingType() const
Definition Type.cpp:2304
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2253
bool isAnyPointerType() const
Definition TypeBase.h:8539
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9107
bool isRecordType() const
Definition TypeBase.h:8658
bool isUnionType() const
Definition Type.cpp:718
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2257
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2366
const Expr * getInit() const
Definition Decl.h:1368
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1217
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1295
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2375
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1262
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1358
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3966
Expr * getSizeExpr() const
Definition TypeBase.h:3980
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:154
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:145
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
@ Conditional
A conditional (?:) operator.
Definition Sema.h:667
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
static bool classof(const Stmt *T)
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5898
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:562
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
@ OMPC_SEVERITY_unknown
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1746
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5351
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.