clang 22.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/raw_ostream.h"
42#include <cassert>
43#include <cstdint>
44#include <numeric>
45#include <optional>
46
47using namespace clang;
48using namespace CodeGen;
49using namespace llvm::omp;
50
51namespace {
52/// Base class for handling code generation inside OpenMP regions.
53class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
54public:
55 /// Kinds of OpenMP regions used in codegen.
56 enum CGOpenMPRegionKind {
57 /// Region with outlined function for standalone 'parallel'
58 /// directive.
59 ParallelOutlinedRegion,
60 /// Region with outlined function for standalone 'task' directive.
61 TaskOutlinedRegion,
62 /// Region for constructs that do not require function outlining,
63 /// like 'for', 'sections', 'atomic' etc. directives.
64 InlinedRegion,
65 /// Region with outlined function for standalone 'target' directive.
66 TargetRegion,
67 };
68
69 CGOpenMPRegionInfo(const CapturedStmt &CS,
70 const CGOpenMPRegionKind RegionKind,
71 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
72 bool HasCancel)
73 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
74 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
75
76 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
77 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
78 bool HasCancel)
79 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
80 Kind(Kind), HasCancel(HasCancel) {}
81
82 /// Get a variable or parameter for storing global thread id
83 /// inside OpenMP construct.
84 virtual const VarDecl *getThreadIDVariable() const = 0;
85
86 /// Emit the captured statement body.
87 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
88
89 /// Get an LValue for the current ThreadID variable.
90 /// \return LValue for thread id variable. This LValue always has type int32*.
91 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
92
93 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
94
95 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
96
97 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
98
99 bool hasCancel() const { return HasCancel; }
100
101 static bool classof(const CGCapturedStmtInfo *Info) {
102 return Info->getKind() == CR_OpenMP;
103 }
104
105 ~CGOpenMPRegionInfo() override = default;
106
107protected:
108 CGOpenMPRegionKind RegionKind;
109 RegionCodeGenTy CodeGen;
111 bool HasCancel;
112};
113
114/// API for captured statement code generation in OpenMP constructs.
115class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
116public:
117 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
118 const RegionCodeGenTy &CodeGen,
119 OpenMPDirectiveKind Kind, bool HasCancel,
120 StringRef HelperName)
121 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
122 HasCancel),
123 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
124 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
125 }
126
127 /// Get a variable or parameter for storing global thread id
128 /// inside OpenMP construct.
129 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
130
131 /// Get the name of the capture helper.
132 StringRef getHelperName() const override { return HelperName; }
133
134 static bool classof(const CGCapturedStmtInfo *Info) {
135 return CGOpenMPRegionInfo::classof(Info) &&
136 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
137 ParallelOutlinedRegion;
138 }
139
140private:
141 /// A variable or parameter storing global thread id for OpenMP
142 /// constructs.
143 const VarDecl *ThreadIDVar;
144 StringRef HelperName;
145};
146
147/// API for captured statement code generation in OpenMP constructs.
148class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
149public:
150 class UntiedTaskActionTy final : public PrePostActionTy {
151 bool Untied;
152 const VarDecl *PartIDVar;
153 const RegionCodeGenTy UntiedCodeGen;
154 llvm::SwitchInst *UntiedSwitch = nullptr;
155
156 public:
157 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
158 const RegionCodeGenTy &UntiedCodeGen)
159 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
160 void Enter(CodeGenFunction &CGF) override {
161 if (Untied) {
162 // Emit task switching point.
163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164 CGF.GetAddrOfLocalVar(PartIDVar),
165 PartIDVar->getType()->castAs<PointerType>());
166 llvm::Value *Res =
167 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
168 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
169 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
170 CGF.EmitBlock(DoneBB);
172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
174 CGF.Builder.GetInsertBlock());
175 emitUntiedSwitch(CGF);
176 }
177 }
178 void emitUntiedSwitch(CodeGenFunction &CGF) const {
179 if (Untied) {
180 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
181 CGF.GetAddrOfLocalVar(PartIDVar),
182 PartIDVar->getType()->castAs<PointerType>());
183 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
184 PartIdLVal);
185 UntiedCodeGen(CGF);
186 CodeGenFunction::JumpDest CurPoint =
187 CGF.getJumpDestInCurrentScope(".untied.next.");
189 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
190 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
191 CGF.Builder.GetInsertBlock());
192 CGF.EmitBranchThroughCleanup(CurPoint);
193 CGF.EmitBlock(CurPoint.getBlock());
194 }
195 }
196 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
197 };
198 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
199 const VarDecl *ThreadIDVar,
200 const RegionCodeGenTy &CodeGen,
201 OpenMPDirectiveKind Kind, bool HasCancel,
202 const UntiedTaskActionTy &Action)
203 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
204 ThreadIDVar(ThreadIDVar), Action(Action) {
205 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
206 }
207
208 /// Get a variable or parameter for storing global thread id
209 /// inside OpenMP construct.
210 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
211
212 /// Get an LValue for the current ThreadID variable.
213 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
214
215 /// Get the name of the capture helper.
216 StringRef getHelperName() const override { return ".omp_outlined."; }
217
218 void emitUntiedSwitch(CodeGenFunction &CGF) override {
219 Action.emitUntiedSwitch(CGF);
220 }
221
222 static bool classof(const CGCapturedStmtInfo *Info) {
223 return CGOpenMPRegionInfo::classof(Info) &&
224 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
225 TaskOutlinedRegion;
226 }
227
228private:
229 /// A variable or parameter storing global thread id for OpenMP
230 /// constructs.
231 const VarDecl *ThreadIDVar;
232 /// Action for emitting code for untied tasks.
233 const UntiedTaskActionTy &Action;
234};
235
236/// API for inlined captured statement code generation in OpenMP
237/// constructs.
238class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
239public:
240 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
241 const RegionCodeGenTy &CodeGen,
242 OpenMPDirectiveKind Kind, bool HasCancel)
243 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
244 OldCSI(OldCSI),
245 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
246
247 // Retrieve the value of the context parameter.
248 llvm::Value *getContextValue() const override {
249 if (OuterRegionInfo)
250 return OuterRegionInfo->getContextValue();
251 llvm_unreachable("No context value for inlined OpenMP region");
252 }
253
254 void setContextValue(llvm::Value *V) override {
255 if (OuterRegionInfo) {
256 OuterRegionInfo->setContextValue(V);
257 return;
258 }
259 llvm_unreachable("No context value for inlined OpenMP region");
260 }
261
262 /// Lookup the captured field decl for a variable.
263 const FieldDecl *lookup(const VarDecl *VD) const override {
264 if (OuterRegionInfo)
265 return OuterRegionInfo->lookup(VD);
266 // If there is no outer outlined region,no need to lookup in a list of
267 // captured variables, we can use the original one.
268 return nullptr;
269 }
270
271 FieldDecl *getThisFieldDecl() const override {
272 if (OuterRegionInfo)
273 return OuterRegionInfo->getThisFieldDecl();
274 return nullptr;
275 }
276
277 /// Get a variable or parameter for storing global thread id
278 /// inside OpenMP construct.
279 const VarDecl *getThreadIDVariable() const override {
280 if (OuterRegionInfo)
281 return OuterRegionInfo->getThreadIDVariable();
282 return nullptr;
283 }
284
285 /// Get an LValue for the current ThreadID variable.
286 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
287 if (OuterRegionInfo)
288 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
289 llvm_unreachable("No LValue for inlined OpenMP construct");
290 }
291
292 /// Get the name of the capture helper.
293 StringRef getHelperName() const override {
294 if (auto *OuterRegionInfo = getOldCSI())
295 return OuterRegionInfo->getHelperName();
296 llvm_unreachable("No helper name for inlined OpenMP construct");
297 }
298
299 void emitUntiedSwitch(CodeGenFunction &CGF) override {
300 if (OuterRegionInfo)
301 OuterRegionInfo->emitUntiedSwitch(CGF);
302 }
303
304 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
305
306 static bool classof(const CGCapturedStmtInfo *Info) {
307 return CGOpenMPRegionInfo::classof(Info) &&
308 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
309 }
310
311 ~CGOpenMPInlinedRegionInfo() override = default;
312
313private:
314 /// CodeGen info about outer OpenMP region.
315 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
316 CGOpenMPRegionInfo *OuterRegionInfo;
317};
318
319/// API for captured statement code generation in OpenMP target
320/// constructs. For this captures, implicit parameters are used instead of the
321/// captured fields. The name of the target region has to be unique in a given
322/// application so it is provided by the client, because only the client has
323/// the information to generate that.
324class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
325public:
326 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
327 const RegionCodeGenTy &CodeGen, StringRef HelperName)
328 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
329 /*HasCancel=*/false),
330 HelperName(HelperName) {}
331
332 /// This is unused for target regions because each starts executing
333 /// with a single thread.
334 const VarDecl *getThreadIDVariable() const override { return nullptr; }
335
336 /// Get the name of the capture helper.
337 StringRef getHelperName() const override { return HelperName; }
338
339 static bool classof(const CGCapturedStmtInfo *Info) {
340 return CGOpenMPRegionInfo::classof(Info) &&
341 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
342 }
343
344private:
345 StringRef HelperName;
346};
347
348static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
349 llvm_unreachable("No codegen for expressions");
350}
351/// API for generation of expressions captured in a innermost OpenMP
352/// region.
353class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
354public:
355 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
356 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
357 OMPD_unknown,
358 /*HasCancel=*/false),
359 PrivScope(CGF) {
360 // Make sure the globals captured in the provided statement are local by
361 // using the privatization logic. We assume the same variable is not
362 // captured more than once.
363 for (const auto &C : CS.captures()) {
364 if (!C.capturesVariable() && !C.capturesVariableByCopy())
365 continue;
366
367 const VarDecl *VD = C.getCapturedVar();
368 if (VD->isLocalVarDeclOrParm())
369 continue;
370
371 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
372 /*RefersToEnclosingVariableOrCapture=*/false,
373 VD->getType().getNonReferenceType(), VK_LValue,
374 C.getLocation());
375 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
376 }
377 (void)PrivScope.Privatize();
378 }
379
380 /// Lookup the captured field decl for a variable.
381 const FieldDecl *lookup(const VarDecl *VD) const override {
382 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
383 return FD;
384 return nullptr;
385 }
386
387 /// Emit the captured statement body.
388 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
389 llvm_unreachable("No body for expressions");
390 }
391
392 /// Get a variable or parameter for storing global thread id
393 /// inside OpenMP construct.
394 const VarDecl *getThreadIDVariable() const override {
395 llvm_unreachable("No thread id for expressions");
396 }
397
398 /// Get the name of the capture helper.
399 StringRef getHelperName() const override {
400 llvm_unreachable("No helper name for expressions");
401 }
402
403 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
404
405private:
406 /// Private scope to capture global variables.
407 CodeGenFunction::OMPPrivateScope PrivScope;
408};
409
410/// RAII for emitting code of OpenMP constructs.
411class InlinedOpenMPRegionRAII {
412 CodeGenFunction &CGF;
413 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
414 FieldDecl *LambdaThisCaptureField = nullptr;
415 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
416 bool NoInheritance = false;
417
418public:
419 /// Constructs region for combined constructs.
420 /// \param CodeGen Code generation sequence for combined directives. Includes
421 /// a list of functions used for code generation of implicitly inlined
422 /// regions.
423 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
424 OpenMPDirectiveKind Kind, bool HasCancel,
425 bool NoInheritance = true)
426 : CGF(CGF), NoInheritance(NoInheritance) {
427 // Start emission for the construct.
428 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
429 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
430 if (NoInheritance) {
431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
433 CGF.LambdaThisCaptureField = nullptr;
434 BlockInfo = CGF.BlockInfo;
435 CGF.BlockInfo = nullptr;
436 }
437 }
438
439 ~InlinedOpenMPRegionRAII() {
440 // Restore original CapturedStmtInfo only if we're done with code emission.
441 auto *OldCSI =
442 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
443 delete CGF.CapturedStmtInfo;
444 CGF.CapturedStmtInfo = OldCSI;
445 if (NoInheritance) {
446 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
447 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
448 CGF.BlockInfo = BlockInfo;
449 }
450 }
451};
452
453/// Values for bit flags used in the ident_t to describe the fields.
454/// All enumeric elements are named and described in accordance with the code
455/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
456enum OpenMPLocationFlags : unsigned {
457 /// Use trampoline for internal microtask.
458 OMP_IDENT_IMD = 0x01,
459 /// Use c-style ident structure.
460 OMP_IDENT_KMPC = 0x02,
461 /// Atomic reduction option for kmpc_reduce.
462 OMP_ATOMIC_REDUCE = 0x10,
463 /// Explicit 'barrier' directive.
464 OMP_IDENT_BARRIER_EXPL = 0x20,
465 /// Implicit barrier in code.
466 OMP_IDENT_BARRIER_IMPL = 0x40,
467 /// Implicit barrier in 'for' directive.
468 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
469 /// Implicit barrier in 'sections' directive.
470 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
471 /// Implicit barrier in 'single' directive.
472 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
473 /// Call of __kmp_for_static_init for static loop.
474 OMP_IDENT_WORK_LOOP = 0x200,
475 /// Call of __kmp_for_static_init for sections.
476 OMP_IDENT_WORK_SECTIONS = 0x400,
477 /// Call of __kmp_for_static_init for distribute.
478 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
479 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
480};
481
482/// Describes ident structure that describes a source location.
483/// All descriptions are taken from
484/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
485/// Original structure:
486/// typedef struct ident {
487/// kmp_int32 reserved_1; /**< might be used in Fortran;
488/// see above */
489/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
490/// KMP_IDENT_KMPC identifies this union
491/// member */
492/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
493/// see above */
494///#if USE_ITT_BUILD
495/// /* but currently used for storing
496/// region-specific ITT */
497/// /* contextual information. */
498///#endif /* USE_ITT_BUILD */
499/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
500/// C++ */
501/// char const *psource; /**< String describing the source location.
502/// The string is composed of semi-colon separated
503// fields which describe the source file,
504/// the function and a pair of line numbers that
505/// delimit the construct.
506/// */
507/// } ident_t;
508enum IdentFieldIndex {
509 /// might be used in Fortran
510 IdentField_Reserved_1,
511 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
512 IdentField_Flags,
513 /// Not really used in Fortran any more
514 IdentField_Reserved_2,
515 /// Source[4] in Fortran, do not use for C++
516 IdentField_Reserved_3,
517 /// String describing the source location. The string is composed of
518 /// semi-colon separated fields which describe the source file, the function
519 /// and a pair of line numbers that delimit the construct.
520 IdentField_PSource
521};
522
523/// Schedule types for 'omp for' loops (these enumerators are taken from
524/// the enum sched_type in kmp.h).
525enum OpenMPSchedType {
526 /// Lower bound for default (unordered) versions.
527 OMP_sch_lower = 32,
528 OMP_sch_static_chunked = 33,
529 OMP_sch_static = 34,
530 OMP_sch_dynamic_chunked = 35,
531 OMP_sch_guided_chunked = 36,
532 OMP_sch_runtime = 37,
533 OMP_sch_auto = 38,
534 /// static with chunk adjustment (e.g., simd)
535 OMP_sch_static_balanced_chunked = 45,
536 /// Lower bound for 'ordered' versions.
537 OMP_ord_lower = 64,
538 OMP_ord_static_chunked = 65,
539 OMP_ord_static = 66,
540 OMP_ord_dynamic_chunked = 67,
541 OMP_ord_guided_chunked = 68,
542 OMP_ord_runtime = 69,
543 OMP_ord_auto = 70,
544 OMP_sch_default = OMP_sch_static,
545 /// dist_schedule types
546 OMP_dist_sch_static_chunked = 91,
547 OMP_dist_sch_static = 92,
548 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
549 /// Set if the monotonic schedule modifier was present.
550 OMP_sch_modifier_monotonic = (1 << 29),
551 /// Set if the nonmonotonic schedule modifier was present.
552 OMP_sch_modifier_nonmonotonic = (1 << 30),
553};
554
555/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
556/// region.
557class CleanupTy final : public EHScopeStack::Cleanup {
558 PrePostActionTy *Action;
559
560public:
561 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
562 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
563 if (!CGF.HaveInsertPoint())
564 return;
565 Action->Exit(CGF);
566 }
567};
568
569} // anonymous namespace
570
573 if (PrePostAction) {
574 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
575 Callback(CodeGen, CGF, *PrePostAction);
576 } else {
577 PrePostActionTy Action;
578 Callback(CodeGen, CGF, Action);
579 }
580}
581
582/// Check if the combiner is a call to UDR combiner and if it is so return the
583/// UDR decl used for reduction.
584static const OMPDeclareReductionDecl *
585getReductionInit(const Expr *ReductionOp) {
586 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
587 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
588 if (const auto *DRE =
589 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
590 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
591 return DRD;
592 return nullptr;
593}
594
596 const OMPDeclareReductionDecl *DRD,
597 const Expr *InitOp,
598 Address Private, Address Original,
599 QualType Ty) {
600 if (DRD->getInitializer()) {
601 std::pair<llvm::Function *, llvm::Function *> Reduction =
603 const auto *CE = cast<CallExpr>(InitOp);
604 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
605 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
606 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
607 const auto *LHSDRE =
608 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
609 const auto *RHSDRE =
610 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
611 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
612 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
613 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
614 (void)PrivateScope.Privatize();
617 CGF.EmitIgnoredExpr(InitOp);
618 } else {
619 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
620 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
621 auto *GV = new llvm::GlobalVariable(
622 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
623 llvm::GlobalValue::PrivateLinkage, Init, Name);
624 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
625 RValue InitRVal;
626 switch (CGF.getEvaluationKind(Ty)) {
627 case TEK_Scalar:
628 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
629 break;
630 case TEK_Complex:
631 InitRVal =
633 break;
634 case TEK_Aggregate: {
635 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
636 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
637 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
638 /*IsInitializer=*/false);
639 return;
640 }
641 }
642 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
643 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
644 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
645 /*IsInitializer=*/false);
646 }
647}
648
649/// Emit initialization of arrays of complex types.
650/// \param DestAddr Address of the array.
651/// \param Type Type of array.
652/// \param Init Initial expression of array.
653/// \param SrcAddr Address of the original array.
655 QualType Type, bool EmitDeclareReductionInit,
656 const Expr *Init,
657 const OMPDeclareReductionDecl *DRD,
658 Address SrcAddr = Address::invalid()) {
659 // Perform element-by-element initialization.
660 QualType ElementTy;
661
662 // Drill down to the base element type on both arrays.
663 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
664 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
665 if (DRD)
666 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
667
668 llvm::Value *SrcBegin = nullptr;
669 if (DRD)
670 SrcBegin = SrcAddr.emitRawPointer(CGF);
671 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
672 // Cast from pointer to array type to pointer to single element.
673 llvm::Value *DestEnd =
674 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
675 // The basic structure here is a while-do loop.
676 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
677 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
678 llvm::Value *IsEmpty =
679 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
680 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
681
682 // Enter the loop body, making that address the current address.
683 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
684 CGF.EmitBlock(BodyBB);
685
686 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
687
688 llvm::PHINode *SrcElementPHI = nullptr;
689 Address SrcElementCurrent = Address::invalid();
690 if (DRD) {
691 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
692 "omp.arraycpy.srcElementPast");
693 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
694 SrcElementCurrent =
695 Address(SrcElementPHI, SrcAddr.getElementType(),
696 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
697 }
698 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
699 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
700 DestElementPHI->addIncoming(DestBegin, EntryBB);
701 Address DestElementCurrent =
702 Address(DestElementPHI, DestAddr.getElementType(),
703 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
704
705 // Emit copy.
706 {
708 if (EmitDeclareReductionInit) {
709 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
710 SrcElementCurrent, ElementTy);
711 } else
712 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
713 /*IsInitializer=*/false);
714 }
715
716 if (DRD) {
717 // Shift the address forward by one element.
718 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
719 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
720 "omp.arraycpy.dest.element");
721 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
722 }
723
724 // Shift the address forward by one element.
725 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
726 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
727 "omp.arraycpy.dest.element");
728 // Check whether we've reached the end.
729 llvm::Value *Done =
730 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
731 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
732 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
733
734 // Done.
735 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
736}
737
738LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
739 return CGF.EmitOMPSharedLValue(E);
740}
741
742LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
743 const Expr *E) {
744 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
745 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
746 return LValue();
747}
748
749void ReductionCodeGen::emitAggregateInitialization(
750 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
751 const OMPDeclareReductionDecl *DRD) {
752 // Emit VarDecl with copy init for arrays.
753 // Get the address of the original variable captured in current
754 // captured region.
755 const auto *PrivateVD =
756 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
757 bool EmitDeclareReductionInit =
758 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
759 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
760 EmitDeclareReductionInit,
761 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
762 : PrivateVD->getInit(),
763 DRD, SharedAddr);
764}
765
768 ArrayRef<const Expr *> Privates,
769 ArrayRef<const Expr *> ReductionOps) {
770 ClausesData.reserve(Shareds.size());
771 SharedAddresses.reserve(Shareds.size());
772 Sizes.reserve(Shareds.size());
773 BaseDecls.reserve(Shareds.size());
774 const auto *IOrig = Origs.begin();
775 const auto *IPriv = Privates.begin();
776 const auto *IRed = ReductionOps.begin();
777 for (const Expr *Ref : Shareds) {
778 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
779 std::advance(IOrig, 1);
780 std::advance(IPriv, 1);
781 std::advance(IRed, 1);
782 }
783}
784
786 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
787 "Number of generated lvalues must be exactly N.");
788 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
789 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
790 SharedAddresses.emplace_back(First, Second);
791 if (ClausesData[N].Shared == ClausesData[N].Ref) {
792 OrigAddresses.emplace_back(First, Second);
793 } else {
794 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
795 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
796 OrigAddresses.emplace_back(First, Second);
797 }
798}
799
801 QualType PrivateType = getPrivateType(N);
802 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
803 if (!PrivateType->isVariablyModifiedType()) {
804 Sizes.emplace_back(
805 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
806 nullptr);
807 return;
808 }
809 llvm::Value *Size;
810 llvm::Value *SizeInChars;
811 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
812 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
813 if (AsArraySection) {
814 Size = CGF.Builder.CreatePtrDiff(ElemType,
815 OrigAddresses[N].second.getPointer(CGF),
816 OrigAddresses[N].first.getPointer(CGF));
817 Size = CGF.Builder.CreateNUWAdd(
818 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
819 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
820 } else {
821 SizeInChars =
822 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
823 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
824 }
825 Sizes.emplace_back(SizeInChars, Size);
827 CGF,
829 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
830 RValue::get(Size));
831 CGF.EmitVariablyModifiedType(PrivateType);
832}
833
835 llvm::Value *Size) {
836 QualType PrivateType = getPrivateType(N);
837 if (!PrivateType->isVariablyModifiedType()) {
838 assert(!Size && !Sizes[N].second &&
839 "Size should be nullptr for non-variably modified reduction "
840 "items.");
841 return;
842 }
844 CGF,
846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
847 RValue::get(Size));
848 CGF.EmitVariablyModifiedType(PrivateType);
849}
850
852 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
853 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
854 assert(SharedAddresses.size() > N && "No variable was generated");
855 const auto *PrivateVD =
856 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
857 const OMPDeclareReductionDecl *DRD =
858 getReductionInit(ClausesData[N].ReductionOp);
859 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
860 if (DRD && DRD->getInitializer())
861 (void)DefaultInit(CGF);
862 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
863 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
864 (void)DefaultInit(CGF);
865 QualType SharedType = SharedAddresses[N].first.getType();
866 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
867 PrivateAddr, SharedAddr, SharedType);
868 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
869 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
870 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
871 PrivateVD->getType().getQualifiers(),
872 /*IsInitializer=*/false);
873 }
874}
875
877 QualType PrivateType = getPrivateType(N);
878 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
879 return DTorKind != QualType::DK_none;
880}
881
883 Address PrivateAddr) {
884 QualType PrivateType = getPrivateType(N);
885 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
886 if (needCleanups(N)) {
887 PrivateAddr =
888 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
889 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
890 }
891}
892
893static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
894 LValue BaseLV) {
895 BaseTy = BaseTy.getNonReferenceType();
896 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
897 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
898 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
899 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
900 } else {
901 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
902 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
903 }
904 BaseTy = BaseTy->getPointeeType();
905 }
906 return CGF.MakeAddrLValue(
907 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
908 BaseLV.getType(), BaseLV.getBaseInfo(),
909 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
910}
911
913 Address OriginalBaseAddress, llvm::Value *Addr) {
915 Address TopTmp = Address::invalid();
916 Address MostTopTmp = Address::invalid();
917 BaseTy = BaseTy.getNonReferenceType();
918 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
919 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
920 Tmp = CGF.CreateMemTemp(BaseTy);
921 if (TopTmp.isValid())
922 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
923 else
924 MostTopTmp = Tmp;
925 TopTmp = Tmp;
926 BaseTy = BaseTy->getPointeeType();
927 }
928
929 if (Tmp.isValid()) {
931 Addr, Tmp.getElementType());
932 CGF.Builder.CreateStore(Addr, Tmp);
933 return MostTopTmp;
934 }
935
937 Addr, OriginalBaseAddress.getType());
938 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
939}
940
941static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
942 const VarDecl *OrigVD = nullptr;
943 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
944 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
945 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
946 Base = TempOASE->getBase()->IgnoreParenImpCasts();
947 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
948 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 OrigVD = cast<VarDecl>(DE->getDecl());
951 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
952 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
953 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
954 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 OrigVD = cast<VarDecl>(DE->getDecl());
957 }
958 return OrigVD;
959}
960
962 Address PrivateAddr) {
963 const DeclRefExpr *DE;
964 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
965 BaseDecls.emplace_back(OrigVD);
966 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
967 LValue BaseLValue =
968 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
969 OriginalBaseLValue);
970 Address SharedAddr = SharedAddresses[N].first.getAddress();
971 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
972 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
973 SharedAddr.emitRawPointer(CGF));
974 llvm::Value *PrivatePointer =
976 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
977 llvm::Value *Ptr = CGF.Builder.CreateGEP(
978 SharedAddr.getElementType(), PrivatePointer, Adjustment);
979 return castToBase(CGF, OrigVD->getType(),
980 SharedAddresses[N].first.getType(),
981 OriginalBaseLValue.getAddress(), Ptr);
982 }
983 BaseDecls.emplace_back(
984 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
985 return PrivateAddr;
986}
987
989 const OMPDeclareReductionDecl *DRD =
990 getReductionInit(ClausesData[N].ReductionOp);
991 return DRD && DRD->getInitializer();
992}
993
994LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
995 return CGF.EmitLoadOfPointerLValue(
996 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
997 getThreadIDVariable()->getType()->castAs<PointerType>());
998}
999
1000void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1001 if (!CGF.HaveInsertPoint())
1002 return;
1003 // 1.2.2 OpenMP Language Terminology
1004 // Structured block - An executable statement with a single entry at the
1005 // top and a single exit at the bottom.
1006 // The point of exit cannot be a branch out of the structured block.
1007 // longjmp() and throw() must not violate the entry/exit criteria.
1008 CGF.EHStack.pushTerminate();
1009 if (S)
1011 CodeGen(CGF);
1012 CGF.EHStack.popTerminate();
1013}
1014
1015LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1016 CodeGenFunction &CGF) {
1017 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1018 getThreadIDVariable()->getType(),
1020}
1021
1023 QualType FieldTy) {
1024 auto *Field = FieldDecl::Create(
1025 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1026 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1027 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1028 Field->setAccess(AS_public);
1029 DC->addDecl(Field);
1030 return Field;
1031}
1032
1034 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1035 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1036 llvm::OpenMPIRBuilderConfig Config(
1037 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1038 CGM.getLangOpts().OpenMPOffloadMandatory,
1039 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1040 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1041 Config.setDefaultTargetAS(
1042 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1043 Config.setRuntimeCC(CGM.getRuntimeCC());
1044
1045 OMPBuilder.setConfig(Config);
1046 OMPBuilder.initialize();
1047 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1048 CGM.getLangOpts().OpenMPIsTargetDevice
1049 ? CGM.getLangOpts().OMPHostIRFile
1050 : StringRef{});
1051
1052 // The user forces the compiler to behave as if omp requires
1053 // unified_shared_memory was given.
1054 if (CGM.getLangOpts().OpenMPForceUSM) {
1056 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1057 }
1058}
1059
1061 InternalVars.clear();
1062 // Clean non-target variable declarations possibly used only in debug info.
1063 for (const auto &Data : EmittedNonTargetVariables) {
1064 if (!Data.getValue().pointsToAliveValue())
1065 continue;
1066 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1067 if (!GV)
1068 continue;
1069 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1070 continue;
1071 GV->eraseFromParent();
1072 }
1073}
1074
1076 return OMPBuilder.createPlatformSpecificName(Parts);
1077}
1078
1079static llvm::Function *
1081 const Expr *CombinerInitializer, const VarDecl *In,
1082 const VarDecl *Out, bool IsCombiner) {
1083 // void .omp_combiner.(Ty *in, Ty *out);
1084 ASTContext &C = CGM.getContext();
1085 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1086 FunctionArgList Args;
1087 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1090 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1091 Args.push_back(&OmpOutParm);
1092 Args.push_back(&OmpInParm);
1093 const CGFunctionInfo &FnInfo =
1094 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1095 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1096 std::string Name = CGM.getOpenMPRuntime().getName(
1097 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1098 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1099 Name, &CGM.getModule());
1100 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1101 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1102 Fn->removeFnAttr(llvm::Attribute::NoInline);
1103 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1104 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1105 }
1106 CodeGenFunction CGF(CGM);
1107 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1108 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1109 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1110 Out->getLocation());
1112 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1113 Scope.addPrivate(
1114 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1115 .getAddress());
1116 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1117 Scope.addPrivate(
1118 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1119 .getAddress());
1120 (void)Scope.Privatize();
1121 if (!IsCombiner && Out->hasInit() &&
1122 !CGF.isTrivialInitializer(Out->getInit())) {
1123 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1124 Out->getType().getQualifiers(),
1125 /*IsInitializer=*/true);
1126 }
1127 if (CombinerInitializer)
1128 CGF.EmitIgnoredExpr(CombinerInitializer);
1129 Scope.ForceCleanup();
1130 CGF.FinishFunction();
1131 return Fn;
1132}
1133
1136 if (UDRMap.count(D) > 0)
1137 return;
1138 llvm::Function *Combiner = emitCombinerOrInitializer(
1139 CGM, D->getType(), D->getCombiner(),
1142 /*IsCombiner=*/true);
1143 llvm::Function *Initializer = nullptr;
1144 if (const Expr *Init = D->getInitializer()) {
1146 CGM, D->getType(),
1148 : nullptr,
1151 /*IsCombiner=*/false);
1152 }
1153 UDRMap.try_emplace(D, Combiner, Initializer);
1154 if (CGF)
1155 FunctionUDRMap[CGF->CurFn].push_back(D);
1156}
1157
1158std::pair<llvm::Function *, llvm::Function *>
1160 auto I = UDRMap.find(D);
1161 if (I != UDRMap.end())
1162 return I->second;
1163 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164 return UDRMap.lookup(D);
1165}
1166
1167namespace {
1168// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169// Builder if one is present.
1170struct PushAndPopStackRAII {
1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172 bool HasCancel, llvm::omp::Directive Kind)
1173 : OMPBuilder(OMPBuilder) {
1174 if (!OMPBuilder)
1175 return;
1176
1177 // The following callback is the crucial part of clangs cleanup process.
1178 //
1179 // NOTE:
1180 // Once the OpenMPIRBuilder is used to create parallel regions (and
1181 // similar), the cancellation destination (Dest below) is determined via
1182 // IP. That means if we have variables to finalize we split the block at IP,
1183 // use the new block (=BB) as destination to build a JumpDest (via
1184 // getJumpDestInCurrentScope(BB)) which then is fed to
1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186 // to push & pop an FinalizationInfo object.
1187 // The FiniCB will still be needed but at the point where the
1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190 assert(IP.getBlock()->end() == IP.getPoint() &&
1191 "Clang CG should cause non-terminated block!");
1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193 CGF.Builder.restoreIP(IP);
1195 CGF.getOMPCancelDestination(OMPD_parallel);
1196 CGF.EmitBranchThroughCleanup(Dest);
1197 return llvm::Error::success();
1198 };
1199
1200 // TODO: Remove this once we emit parallel regions through the
1201 // OpenMPIRBuilder as it can do this setup internally.
1202 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1203 OMPBuilder->pushFinalizationCB(std::move(FI));
1204 }
1205 ~PushAndPopStackRAII() {
1206 if (OMPBuilder)
1207 OMPBuilder->popFinalizationCB();
1208 }
1209 llvm::OpenMPIRBuilder *OMPBuilder;
1210};
1211} // namespace
1212
1214 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1215 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1216 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1217 assert(ThreadIDVar->getType()->isPointerType() &&
1218 "thread id variable must be of type kmp_int32 *");
1219 CodeGenFunction CGF(CGM, true);
1220 bool HasCancel = false;
1221 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1222 HasCancel = OPD->hasCancel();
1223 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1224 HasCancel = OPD->hasCancel();
1225 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1226 HasCancel = OPSD->hasCancel();
1227 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1230 HasCancel = OPFD->hasCancel();
1231 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD =
1234 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1235 HasCancel = OPFD->hasCancel();
1236 else if (const auto *OPFD =
1237 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1238 HasCancel = OPFD->hasCancel();
1239
1240 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1241 // parallel region to make cancellation barriers work properly.
1242 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1243 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1244 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1245 HasCancel, OutlinedHelperName);
1246 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1247 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1248}
1249
1250std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1251 std::string Suffix = getName({"omp_outlined"});
1252 return (Name + Suffix).str();
1253}
1254
1256 return getOutlinedHelperName(CGF.CurFn->getName());
1257}
1258
1259std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1260 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1261 return (Name + Suffix).str();
1262}
1263
1266 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1267 const RegionCodeGenTy &CodeGen) {
1268 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1270 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1271 CodeGen);
1272}
1273
1276 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1277 const RegionCodeGenTy &CodeGen) {
1278 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1280 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1281 CodeGen);
1282}
1283
1285 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1286 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1287 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1288 bool Tied, unsigned &NumberOfParts) {
1289 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1290 PrePostActionTy &) {
1291 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1292 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1293 llvm::Value *TaskArgs[] = {
1294 UpLoc, ThreadID,
1295 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1296 TaskTVar->getType()->castAs<PointerType>())
1297 .getPointer(CGF)};
1298 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1299 CGM.getModule(), OMPRTL___kmpc_omp_task),
1300 TaskArgs);
1301 };
1302 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1303 UntiedCodeGen);
1304 CodeGen.setAction(Action);
1305 assert(!ThreadIDVar->getType()->isPointerType() &&
1306 "thread id variable must be of type kmp_int32 for tasks");
1307 const OpenMPDirectiveKind Region =
1308 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1309 : OMPD_task;
1310 const CapturedStmt *CS = D.getCapturedStmt(Region);
1311 bool HasCancel = false;
1312 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1313 HasCancel = TD->hasCancel();
1314 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1315 HasCancel = TD->hasCancel();
1316 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1317 HasCancel = TD->hasCancel();
1318 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1319 HasCancel = TD->hasCancel();
1320
1321 CodeGenFunction CGF(CGM, true);
1322 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1323 InnermostKind, HasCancel, Action);
1324 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1325 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1326 if (!Tied)
1327 NumberOfParts = Action.getNumberOfParts();
1328 return Res;
1329}
1330
1332 bool AtCurrentPoint) {
1333 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1334 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1335
1336 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1337 if (AtCurrentPoint) {
1338 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1339 CGF.Builder.GetInsertBlock());
1340 } else {
1341 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1343 }
1344}
1345
1347 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1348 if (Elem.ServiceInsertPt) {
1349 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1350 Elem.ServiceInsertPt = nullptr;
1351 Ptr->eraseFromParent();
1352 }
1353}
1354
1356 SourceLocation Loc,
1357 SmallString<128> &Buffer) {
1358 llvm::raw_svector_ostream OS(Buffer);
1359 // Build debug location
1361 OS << ";";
1362 if (auto *DbgInfo = CGF.getDebugInfo())
1363 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1364 else
1365 OS << PLoc.getFilename();
1366 OS << ";";
1367 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1368 OS << FD->getQualifiedNameAsString();
1369 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1370 return OS.str();
1371}
1372
1374 SourceLocation Loc,
1375 unsigned Flags, bool EmitLoc) {
1376 uint32_t SrcLocStrSize;
1377 llvm::Constant *SrcLocStr;
1378 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1379 llvm::codegenoptions::NoDebugInfo) ||
1380 Loc.isInvalid()) {
1381 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1382 } else {
1383 std::string FunctionName;
1384 std::string FileName;
1385 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1386 FunctionName = FD->getQualifiedNameAsString();
1388 if (auto *DbgInfo = CGF.getDebugInfo())
1389 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1390 else
1391 FileName = PLoc.getFilename();
1392 unsigned Line = PLoc.getLine();
1393 unsigned Column = PLoc.getColumn();
1394 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1395 Column, SrcLocStrSize);
1396 }
1397 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1398 return OMPBuilder.getOrCreateIdent(
1399 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1400}
1401
1403 SourceLocation Loc) {
1404 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1405 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1406 // the clang invariants used below might be broken.
1407 if (CGM.getLangOpts().OpenMPIRBuilder) {
1408 SmallString<128> Buffer;
1409 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1410 uint32_t SrcLocStrSize;
1411 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1412 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1413 return OMPBuilder.getOrCreateThreadID(
1414 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1415 }
1416
1417 llvm::Value *ThreadID = nullptr;
1418 // Check whether we've already cached a load of the thread id in this
1419 // function.
1420 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1421 if (I != OpenMPLocThreadIDMap.end()) {
1422 ThreadID = I->second.ThreadID;
1423 if (ThreadID != nullptr)
1424 return ThreadID;
1425 }
1426 // If exceptions are enabled, do not use parameter to avoid possible crash.
1427 if (auto *OMPRegionInfo =
1428 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1429 if (OMPRegionInfo->getThreadIDVariable()) {
1430 // Check if this an outlined function with thread id passed as argument.
1431 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1432 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1433 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1434 !CGF.getLangOpts().CXXExceptions ||
1435 CGF.Builder.GetInsertBlock() == TopBlock ||
1436 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1437 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1438 TopBlock ||
1439 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1440 CGF.Builder.GetInsertBlock()) {
1441 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1442 // If value loaded in entry block, cache it and use it everywhere in
1443 // function.
1444 if (CGF.Builder.GetInsertBlock() == TopBlock)
1445 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1446 return ThreadID;
1447 }
1448 }
1449 }
1450
1451 // This is not an outlined function region - need to call __kmpc_int32
1452 // kmpc_global_thread_num(ident_t *loc).
1453 // Generate thread id value and cache this value for use across the
1454 // function.
1455 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1456 if (!Elem.ServiceInsertPt)
1458 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1459 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1461 llvm::CallInst *Call = CGF.Builder.CreateCall(
1462 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1463 OMPRTL___kmpc_global_thread_num),
1464 emitUpdateLocation(CGF, Loc));
1465 Call->setCallingConv(CGF.getRuntimeCC());
1466 Elem.ThreadID = Call;
1467 return Call;
1468}
1469
1471 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1472 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1474 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1475 }
1476 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1477 for (const auto *D : I->second)
1478 UDRMap.erase(D);
1479 FunctionUDRMap.erase(I);
1480 }
1481 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1482 for (const auto *D : I->second)
1483 UDMMap.erase(D);
1484 FunctionUDMMap.erase(I);
1485 }
1488}
1489
1491 return OMPBuilder.IdentPtr;
1492}
1493
1494static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1496 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1497 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1498 if (!DevTy)
1499 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1500
1501 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1502 case OMPDeclareTargetDeclAttr::DT_Host:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1504 break;
1505 case OMPDeclareTargetDeclAttr::DT_NoHost:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1507 break;
1508 case OMPDeclareTargetDeclAttr::DT_Any:
1509 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1510 break;
1511 default:
1512 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1513 break;
1514 }
1515}
1516
1517static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1519 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1520 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1521 if (!MapType)
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1523 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1526 break;
1527 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1529 break;
1530 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1531 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1532 break;
1533 default:
1534 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1535 break;
1536 }
1537}
1538
1539static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1540 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1541 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1542
1543 auto FileInfoCallBack = [&]() {
1545 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1546
1547 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1548 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1549
1550 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1551 };
1552
1553 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1554 *CGM.getFileSystem(), ParentName);
1555}
1556
1558 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1559
1560 auto LinkageForVariable = [&VD, this]() {
1561 return CGM.getLLVMLinkageVarDefinition(VD);
1562 };
1563
1564 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1565
1566 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1567 CGM.getContext().getPointerType(VD->getType()));
1568 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1570 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1571 VD->isExternallyVisible(),
1573 VD->getCanonicalDecl()->getBeginLoc()),
1574 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1575 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1576 LinkageForVariable);
1577
1578 if (!addr)
1579 return ConstantAddress::invalid();
1580 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1581}
1582
1583llvm::Constant *
1585 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1586 !CGM.getContext().getTargetInfo().isTLSSupported());
1587 // Lookup the entry, lazily creating it if necessary.
1588 std::string Suffix = getName({"cache", ""});
1589 return OMPBuilder.getOrCreateInternalVariable(
1590 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1591}
1592
1594 const VarDecl *VD,
1595 Address VDAddr,
1596 SourceLocation Loc) {
1597 if (CGM.getLangOpts().OpenMPUseTLS &&
1598 CGM.getContext().getTargetInfo().isTLSSupported())
1599 return VDAddr;
1600
1601 llvm::Type *VarTy = VDAddr.getElementType();
1602 llvm::Value *Args[] = {
1603 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1604 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1605 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1607 return Address(
1608 CGF.EmitRuntimeCall(
1609 OMPBuilder.getOrCreateRuntimeFunction(
1610 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1611 Args),
1612 CGF.Int8Ty, VDAddr.getAlignment());
1613}
1614
1616 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1617 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1618 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1619 // library.
1620 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1621 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1622 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1623 OMPLoc);
1624 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1625 // to register constructor/destructor for variable.
1626 llvm::Value *Args[] = {
1627 OMPLoc,
1628 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1629 Ctor, CopyCtor, Dtor};
1630 CGF.EmitRuntimeCall(
1631 OMPBuilder.getOrCreateRuntimeFunction(
1632 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1633 Args);
1634}
1635
1637 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1638 bool PerformInit, CodeGenFunction *CGF) {
1639 if (CGM.getLangOpts().OpenMPUseTLS &&
1640 CGM.getContext().getTargetInfo().isTLSSupported())
1641 return nullptr;
1642
1643 VD = VD->getDefinition(CGM.getContext());
1644 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1645 QualType ASTTy = VD->getType();
1646
1647 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1648 const Expr *Init = VD->getAnyInitializer();
1649 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1650 // Generate function that re-emits the declaration's initializer into the
1651 // threadprivate copy of the variable VD
1652 CodeGenFunction CtorCGF(CGM);
1653 FunctionArgList Args;
1654 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1655 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1657 Args.push_back(&Dst);
1658
1659 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1660 CGM.getContext().VoidPtrTy, Args);
1661 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1662 std::string Name = getName({"__kmpc_global_ctor_", ""});
1663 llvm::Function *Fn =
1664 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1665 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1666 Args, Loc, Loc);
1667 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1668 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1669 CGM.getContext().VoidPtrTy, Dst.getLocation());
1670 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1671 VDAddr.getAlignment());
1672 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1673 /*IsInitializer=*/true);
1674 ArgVal = CtorCGF.EmitLoadOfScalar(
1675 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1676 CGM.getContext().VoidPtrTy, Dst.getLocation());
1677 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1678 CtorCGF.FinishFunction();
1679 Ctor = Fn;
1680 }
1682 // Generate function that emits destructor call for the threadprivate copy
1683 // of the variable VD
1684 CodeGenFunction DtorCGF(CGM);
1685 FunctionArgList Args;
1686 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1687 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1689 Args.push_back(&Dst);
1690
1691 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1692 CGM.getContext().VoidTy, Args);
1693 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1694 std::string Name = getName({"__kmpc_global_dtor_", ""});
1695 llvm::Function *Fn =
1696 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1697 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1698 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1699 Loc, Loc);
1700 // Create a scope with an artificial location for the body of this function.
1701 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1702 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1703 DtorCGF.GetAddrOfLocalVar(&Dst),
1704 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1705 DtorCGF.emitDestroy(
1706 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1707 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1708 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1709 DtorCGF.FinishFunction();
1710 Dtor = Fn;
1711 }
1712 // Do not emit init function if it is not required.
1713 if (!Ctor && !Dtor)
1714 return nullptr;
1715
1716 // Copying constructor for the threadprivate variable.
1717 // Must be NULL - reserved by runtime, but currently it requires that this
1718 // parameter is always NULL. Otherwise it fires assertion.
1719 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1720 if (Ctor == nullptr) {
1721 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1722 }
1723 if (Dtor == nullptr) {
1724 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1725 }
1726 if (!CGF) {
1727 auto *InitFunctionTy =
1728 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1729 std::string Name = getName({"__omp_threadprivate_init_", ""});
1730 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1731 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1732 CodeGenFunction InitCGF(CGM);
1733 FunctionArgList ArgList;
1734 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1735 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1736 Loc, Loc);
1737 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1738 InitCGF.FinishFunction();
1739 return InitFunction;
1740 }
1741 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1742 }
1743 return nullptr;
1744}
1745
1747 llvm::GlobalValue *GV) {
1748 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1749 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1750
1751 // We only need to handle active 'indirect' declare target functions.
1752 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1753 return;
1754
1755 // Get a mangled name to store the new device global in.
1756 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1758 SmallString<128> Name;
1759 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1760
1761 // We need to generate a new global to hold the address of the indirectly
1762 // called device function. Doing this allows us to keep the visibility and
1763 // linkage of the associated function unchanged while allowing the runtime to
1764 // access its value.
1765 llvm::GlobalValue *Addr = GV;
1766 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1767 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1768 CGM.getLLVMContext(),
1769 CGM.getModule().getDataLayout().getProgramAddressSpace());
1770 Addr = new llvm::GlobalVariable(
1771 CGM.getModule(), FnPtrTy,
1772 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1773 nullptr, llvm::GlobalValue::NotThreadLocal,
1774 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1775 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1776 }
1777
1778 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1779 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1780 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1781 llvm::GlobalValue::WeakODRLinkage);
1782}
1783
1785 QualType VarType,
1786 StringRef Name) {
1787 std::string Suffix = getName({"artificial", ""});
1788 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1789 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1790 VarLVType, Twine(Name).concat(Suffix).str());
1791 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1792 CGM.getTarget().isTLSSupported()) {
1793 GAddr->setThreadLocal(/*Val=*/true);
1794 return Address(GAddr, GAddr->getValueType(),
1795 CGM.getContext().getTypeAlignInChars(VarType));
1796 }
1797 std::string CacheSuffix = getName({"cache", ""});
1798 llvm::Value *Args[] = {
1801 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1802 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1803 /*isSigned=*/false),
1804 OMPBuilder.getOrCreateInternalVariable(
1805 CGM.VoidPtrPtrTy,
1806 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1807 return Address(
1809 CGF.EmitRuntimeCall(
1810 OMPBuilder.getOrCreateRuntimeFunction(
1811 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1812 Args),
1813 CGF.Builder.getPtrTy(0)),
1814 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1815}
1816
1818 const RegionCodeGenTy &ThenGen,
1819 const RegionCodeGenTy &ElseGen) {
1820 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1821
1822 // If the condition constant folds and can be elided, try to avoid emitting
1823 // the condition and the dead arm of the if/else.
1824 bool CondConstant;
1825 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1826 if (CondConstant)
1827 ThenGen(CGF);
1828 else
1829 ElseGen(CGF);
1830 return;
1831 }
1832
1833 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1834 // emit the conditional branch.
1835 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1836 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1837 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1838 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1839
1840 // Emit the 'then' code.
1841 CGF.EmitBlock(ThenBlock);
1842 ThenGen(CGF);
1843 CGF.EmitBranch(ContBlock);
1844 // Emit the 'else' code if present.
1845 // There is no need to emit line number for unconditional branch.
1847 CGF.EmitBlock(ElseBlock);
1848 ElseGen(CGF);
1849 // There is no need to emit line number for unconditional branch.
1851 CGF.EmitBranch(ContBlock);
1852 // Emit the continuation block for code after the if.
1853 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1854}
1855
1857 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1858 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1859 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1860 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1861 if (!CGF.HaveInsertPoint())
1862 return;
1863 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1864 auto &M = CGM.getModule();
1865 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1866 this](CodeGenFunction &CGF, PrePostActionTy &) {
1867 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1868 llvm::Value *Args[] = {
1869 RTLoc,
1870 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1871 OutlinedFn};
1873 RealArgs.append(std::begin(Args), std::end(Args));
1874 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1875
1876 llvm::FunctionCallee RTLFn =
1877 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1878 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1879 };
1880 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1881 this](CodeGenFunction &CGF, PrePostActionTy &) {
1883 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1884 // Build calls:
1885 // __kmpc_serialized_parallel(&Loc, GTid);
1886 llvm::Value *Args[] = {RTLoc, ThreadID};
1887 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1888 M, OMPRTL___kmpc_serialized_parallel),
1889 Args);
1890
1891 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1892 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1893 RawAddress ZeroAddrBound =
1895 /*Name=*/".bound.zero.addr");
1896 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1898 // ThreadId for serialized parallels is 0.
1899 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1900 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1901 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1902
1903 // Ensure we do not inline the function. This is trivially true for the ones
1904 // passed to __kmpc_fork_call but the ones called in serialized regions
1905 // could be inlined. This is not a perfect but it is closer to the invariant
1906 // we want, namely, every data environment starts with a new function.
1907 // TODO: We should pass the if condition to the runtime function and do the
1908 // handling there. Much cleaner code.
1909 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1910 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1911 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1912
1913 // __kmpc_end_serialized_parallel(&Loc, GTid);
1914 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1915 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1916 M, OMPRTL___kmpc_end_serialized_parallel),
1917 EndArgs);
1918 };
1919 if (IfCond) {
1920 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1921 } else {
1922 RegionCodeGenTy ThenRCG(ThenGen);
1923 ThenRCG(CGF);
1924 }
1925}
1926
1927// If we're inside an (outlined) parallel region, use the region info's
1928// thread-ID variable (it is passed in a first argument of the outlined function
1929// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1930// regular serial code region, get thread ID by calling kmp_int32
1931// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1932// return the address of that temp.
1934 SourceLocation Loc) {
1935 if (auto *OMPRegionInfo =
1936 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1937 if (OMPRegionInfo->getThreadIDVariable())
1938 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1939
1940 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1941 QualType Int32Ty =
1942 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1943 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1944 CGF.EmitStoreOfScalar(ThreadID,
1945 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1946
1947 return ThreadIDTemp;
1948}
1949
1950llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1951 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1952 std::string Name = getName({Prefix, "var"});
1953 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1954}
1955
1956namespace {
1957/// Common pre(post)-action for different OpenMP constructs.
1958class CommonActionTy final : public PrePostActionTy {
1959 llvm::FunctionCallee EnterCallee;
1960 ArrayRef<llvm::Value *> EnterArgs;
1961 llvm::FunctionCallee ExitCallee;
1962 ArrayRef<llvm::Value *> ExitArgs;
1963 bool Conditional;
1964 llvm::BasicBlock *ContBlock = nullptr;
1965
1966public:
1967 CommonActionTy(llvm::FunctionCallee EnterCallee,
1968 ArrayRef<llvm::Value *> EnterArgs,
1969 llvm::FunctionCallee ExitCallee,
1970 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1971 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1972 ExitArgs(ExitArgs), Conditional(Conditional) {}
1973 void Enter(CodeGenFunction &CGF) override {
1974 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1975 if (Conditional) {
1976 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1977 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1978 ContBlock = CGF.createBasicBlock("omp_if.end");
1979 // Generate the branch (If-stmt)
1980 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1981 CGF.EmitBlock(ThenBlock);
1982 }
1983 }
1984 void Done(CodeGenFunction &CGF) {
1985 // Emit the rest of blocks/branches
1986 CGF.EmitBranch(ContBlock);
1987 CGF.EmitBlock(ContBlock, true);
1988 }
1989 void Exit(CodeGenFunction &CGF) override {
1990 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1991 }
1992};
1993} // anonymous namespace
1994
1996 StringRef CriticalName,
1997 const RegionCodeGenTy &CriticalOpGen,
1998 SourceLocation Loc, const Expr *Hint) {
1999 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2000 // CriticalOpGen();
2001 // __kmpc_end_critical(ident_t *, gtid, Lock);
2002 // Prepare arguments and build a call to __kmpc_critical
2003 if (!CGF.HaveInsertPoint())
2004 return;
2005 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2006 CGM.getModule(),
2007 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2008 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2009 unsigned LockVarArgIdx = 2;
2010 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2011 RuntimeFcn.getFunctionType()
2012 ->getParamType(LockVarArgIdx)
2013 ->getPointerAddressSpace())
2014 LockVar = CGF.Builder.CreateAddrSpaceCast(
2015 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2016 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2017 LockVar};
2018 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2019 std::end(Args));
2020 if (Hint) {
2021 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2022 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2023 }
2024 CommonActionTy Action(RuntimeFcn, EnterArgs,
2025 OMPBuilder.getOrCreateRuntimeFunction(
2026 CGM.getModule(), OMPRTL___kmpc_end_critical),
2027 Args);
2028 CriticalOpGen.setAction(Action);
2029 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2030}
2031
2033 const RegionCodeGenTy &MasterOpGen,
2034 SourceLocation Loc) {
2035 if (!CGF.HaveInsertPoint())
2036 return;
2037 // if(__kmpc_master(ident_t *, gtid)) {
2038 // MasterOpGen();
2039 // __kmpc_end_master(ident_t *, gtid);
2040 // }
2041 // Prepare arguments and build a call to __kmpc_master
2042 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2043 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2044 CGM.getModule(), OMPRTL___kmpc_master),
2045 Args,
2046 OMPBuilder.getOrCreateRuntimeFunction(
2047 CGM.getModule(), OMPRTL___kmpc_end_master),
2048 Args,
2049 /*Conditional=*/true);
2050 MasterOpGen.setAction(Action);
2051 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2052 Action.Done(CGF);
2053}
2054
2056 const RegionCodeGenTy &MaskedOpGen,
2057 SourceLocation Loc, const Expr *Filter) {
2058 if (!CGF.HaveInsertPoint())
2059 return;
2060 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2061 // MaskedOpGen();
2062 // __kmpc_end_masked(iden_t *, gtid);
2063 // }
2064 // Prepare arguments and build a call to __kmpc_masked
2065 llvm::Value *FilterVal = Filter
2066 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2067 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2068 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2069 FilterVal};
2070 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2071 getThreadID(CGF, Loc)};
2072 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2073 CGM.getModule(), OMPRTL___kmpc_masked),
2074 Args,
2075 OMPBuilder.getOrCreateRuntimeFunction(
2076 CGM.getModule(), OMPRTL___kmpc_end_masked),
2077 ArgsEnd,
2078 /*Conditional=*/true);
2079 MaskedOpGen.setAction(Action);
2080 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2081 Action.Done(CGF);
2082}
2083
2085 SourceLocation Loc) {
2086 if (!CGF.HaveInsertPoint())
2087 return;
2088 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2089 OMPBuilder.createTaskyield(CGF.Builder);
2090 } else {
2091 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2092 llvm::Value *Args[] = {
2093 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2094 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2095 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2096 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2097 Args);
2098 }
2099
2100 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2101 Region->emitUntiedSwitch(CGF);
2102}
2103
2105 const RegionCodeGenTy &TaskgroupOpGen,
2106 SourceLocation Loc) {
2107 if (!CGF.HaveInsertPoint())
2108 return;
2109 // __kmpc_taskgroup(ident_t *, gtid);
2110 // TaskgroupOpGen();
2111 // __kmpc_end_taskgroup(ident_t *, gtid);
2112 // Prepare arguments and build a call to __kmpc_taskgroup
2113 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2114 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2115 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2116 Args,
2117 OMPBuilder.getOrCreateRuntimeFunction(
2118 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2119 Args);
2120 TaskgroupOpGen.setAction(Action);
2121 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2122}
2123
2124/// Given an array of pointers to variables, project the address of a
2125/// given variable.
2127 unsigned Index, const VarDecl *Var) {
2128 // Pull out the pointer to the variable.
2129 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2130 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2131
2132 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2133 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2134}
2135
2137 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2138 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2139 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2140 SourceLocation Loc) {
2141 ASTContext &C = CGM.getContext();
2142 // void copy_func(void *LHSArg, void *RHSArg);
2143 FunctionArgList Args;
2144 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2146 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2148 Args.push_back(&LHSArg);
2149 Args.push_back(&RHSArg);
2150 const auto &CGFI =
2151 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2152 std::string Name =
2153 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2154 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2155 llvm::GlobalValue::InternalLinkage, Name,
2156 &CGM.getModule());
2158 Fn->setDoesNotRecurse();
2159 CodeGenFunction CGF(CGM);
2160 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2161 // Dest = (void*[n])(LHSArg);
2162 // Src = (void*[n])(RHSArg);
2164 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2165 CGF.Builder.getPtrTy(0)),
2166 ArgsElemType, CGF.getPointerAlign());
2168 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2169 CGF.Builder.getPtrTy(0)),
2170 ArgsElemType, CGF.getPointerAlign());
2171 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2172 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2173 // ...
2174 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2175 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2176 const auto *DestVar =
2177 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2178 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2179
2180 const auto *SrcVar =
2181 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2182 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2183
2184 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2185 QualType Type = VD->getType();
2186 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2187 }
2188 CGF.FinishFunction();
2189 return Fn;
2190}
2191
2193 const RegionCodeGenTy &SingleOpGen,
2194 SourceLocation Loc,
2195 ArrayRef<const Expr *> CopyprivateVars,
2196 ArrayRef<const Expr *> SrcExprs,
2197 ArrayRef<const Expr *> DstExprs,
2198 ArrayRef<const Expr *> AssignmentOps) {
2199 if (!CGF.HaveInsertPoint())
2200 return;
2201 assert(CopyprivateVars.size() == SrcExprs.size() &&
2202 CopyprivateVars.size() == DstExprs.size() &&
2203 CopyprivateVars.size() == AssignmentOps.size());
2204 ASTContext &C = CGM.getContext();
2205 // int32 did_it = 0;
2206 // if(__kmpc_single(ident_t *, gtid)) {
2207 // SingleOpGen();
2208 // __kmpc_end_single(ident_t *, gtid);
2209 // did_it = 1;
2210 // }
2211 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2212 // <copy_func>, did_it);
2213
2214 Address DidIt = Address::invalid();
2215 if (!CopyprivateVars.empty()) {
2216 // int32 did_it = 0;
2217 QualType KmpInt32Ty =
2218 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2219 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2220 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2221 }
2222 // Prepare arguments and build a call to __kmpc_single
2223 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2224 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2225 CGM.getModule(), OMPRTL___kmpc_single),
2226 Args,
2227 OMPBuilder.getOrCreateRuntimeFunction(
2228 CGM.getModule(), OMPRTL___kmpc_end_single),
2229 Args,
2230 /*Conditional=*/true);
2231 SingleOpGen.setAction(Action);
2232 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2233 if (DidIt.isValid()) {
2234 // did_it = 1;
2235 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2236 }
2237 Action.Done(CGF);
2238 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2239 // <copy_func>, did_it);
2240 if (DidIt.isValid()) {
2241 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2242 QualType CopyprivateArrayTy = C.getConstantArrayType(
2243 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2244 /*IndexTypeQuals=*/0);
2245 // Create a list of all private variables for copyprivate.
2246 Address CopyprivateList =
2247 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2248 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2249 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2250 CGF.Builder.CreateStore(
2252 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2253 CGF.VoidPtrTy),
2254 Elem);
2255 }
2256 // Build function that copies private values from single region to all other
2257 // threads in the corresponding parallel region.
2258 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2259 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2260 SrcExprs, DstExprs, AssignmentOps, Loc);
2261 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2263 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2264 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2265 llvm::Value *Args[] = {
2266 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2267 getThreadID(CGF, Loc), // i32 <gtid>
2268 BufSize, // size_t <buf_size>
2269 CL.emitRawPointer(CGF), // void *<copyprivate list>
2270 CpyFn, // void (*) (void *, void *) <copy_func>
2271 DidItVal // i32 did_it
2272 };
2273 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2274 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2275 Args);
2276 }
2277}
2278
2280 const RegionCodeGenTy &OrderedOpGen,
2281 SourceLocation Loc, bool IsThreads) {
2282 if (!CGF.HaveInsertPoint())
2283 return;
2284 // __kmpc_ordered(ident_t *, gtid);
2285 // OrderedOpGen();
2286 // __kmpc_end_ordered(ident_t *, gtid);
2287 // Prepare arguments and build a call to __kmpc_ordered
2288 if (IsThreads) {
2289 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2290 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2291 CGM.getModule(), OMPRTL___kmpc_ordered),
2292 Args,
2293 OMPBuilder.getOrCreateRuntimeFunction(
2294 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2295 Args);
2296 OrderedOpGen.setAction(Action);
2297 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2298 return;
2299 }
2300 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2301}
2302
2304 unsigned Flags;
2305 if (Kind == OMPD_for)
2306 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2307 else if (Kind == OMPD_sections)
2308 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2309 else if (Kind == OMPD_single)
2310 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2311 else if (Kind == OMPD_barrier)
2312 Flags = OMP_IDENT_BARRIER_EXPL;
2313 else
2314 Flags = OMP_IDENT_BARRIER_IMPL;
2315 return Flags;
2316}
2317
2319 CodeGenFunction &CGF, const OMPLoopDirective &S,
2320 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2321 // Check if the loop directive is actually a doacross loop directive. In this
2322 // case choose static, 1 schedule.
2323 if (llvm::any_of(
2324 S.getClausesOfKind<OMPOrderedClause>(),
2325 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2326 ScheduleKind = OMPC_SCHEDULE_static;
2327 // Chunk size is 1 in this case.
2328 llvm::APInt ChunkSize(32, 1);
2329 ChunkExpr = IntegerLiteral::Create(
2330 CGF.getContext(), ChunkSize,
2331 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2332 SourceLocation());
2333 }
2334}
2335
2337 OpenMPDirectiveKind Kind, bool EmitChecks,
2338 bool ForceSimpleCall) {
2339 // Check if we should use the OMPBuilder
2340 auto *OMPRegionInfo =
2341 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2342 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2343 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2344 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2345 EmitChecks));
2346 CGF.Builder.restoreIP(AfterIP);
2347 return;
2348 }
2349
2350 if (!CGF.HaveInsertPoint())
2351 return;
2352 // Build call __kmpc_cancel_barrier(loc, thread_id);
2353 // Build call __kmpc_barrier(loc, thread_id);
2354 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2355 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2356 // thread_id);
2357 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2358 getThreadID(CGF, Loc)};
2359 if (OMPRegionInfo) {
2360 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2361 llvm::Value *Result = CGF.EmitRuntimeCall(
2362 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2363 OMPRTL___kmpc_cancel_barrier),
2364 Args);
2365 if (EmitChecks) {
2366 // if (__kmpc_cancel_barrier()) {
2367 // exit from construct;
2368 // }
2369 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2370 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2371 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2372 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2373 CGF.EmitBlock(ExitBB);
2374 // exit from construct;
2375 CodeGenFunction::JumpDest CancelDestination =
2376 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2377 CGF.EmitBranchThroughCleanup(CancelDestination);
2378 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2379 }
2380 return;
2381 }
2382 }
2383 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2384 CGM.getModule(), OMPRTL___kmpc_barrier),
2385 Args);
2386}
2387
2389 Expr *ME, bool IsFatal) {
2390 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2391 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2392 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2393 // *message)
2394 llvm::Value *Args[] = {
2395 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2396 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2397 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2398 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2399 CGM.getModule(), OMPRTL___kmpc_error),
2400 Args);
2401}
2402
2403/// Map the OpenMP loop schedule to the runtime enumeration.
2404static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2405 bool Chunked, bool Ordered) {
2406 switch (ScheduleKind) {
2407 case OMPC_SCHEDULE_static:
2408 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2409 : (Ordered ? OMP_ord_static : OMP_sch_static);
2410 case OMPC_SCHEDULE_dynamic:
2411 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2412 case OMPC_SCHEDULE_guided:
2413 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2414 case OMPC_SCHEDULE_runtime:
2415 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2416 case OMPC_SCHEDULE_auto:
2417 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2419 assert(!Chunked && "chunk was specified but schedule kind not known");
2420 return Ordered ? OMP_ord_static : OMP_sch_static;
2421 }
2422 llvm_unreachable("Unexpected runtime schedule");
2423}
2424
2425/// Map the OpenMP distribute schedule to the runtime enumeration.
2426static OpenMPSchedType
2428 // only static is allowed for dist_schedule
2429 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2430}
2431
2433 bool Chunked) const {
2434 OpenMPSchedType Schedule =
2435 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2436 return Schedule == OMP_sch_static;
2437}
2438
2440 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2441 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2442 return Schedule == OMP_dist_sch_static;
2443}
2444
2446 bool Chunked) const {
2447 OpenMPSchedType Schedule =
2448 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2449 return Schedule == OMP_sch_static_chunked;
2450}
2451
2453 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2454 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2455 return Schedule == OMP_dist_sch_static_chunked;
2456}
2457
2459 OpenMPSchedType Schedule =
2460 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2461 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2462 return Schedule != OMP_sch_static;
2463}
2464
2465static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2468 int Modifier = 0;
2469 switch (M1) {
2470 case OMPC_SCHEDULE_MODIFIER_monotonic:
2471 Modifier = OMP_sch_modifier_monotonic;
2472 break;
2473 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2474 Modifier = OMP_sch_modifier_nonmonotonic;
2475 break;
2476 case OMPC_SCHEDULE_MODIFIER_simd:
2477 if (Schedule == OMP_sch_static_chunked)
2478 Schedule = OMP_sch_static_balanced_chunked;
2479 break;
2482 break;
2483 }
2484 switch (M2) {
2485 case OMPC_SCHEDULE_MODIFIER_monotonic:
2486 Modifier = OMP_sch_modifier_monotonic;
2487 break;
2488 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2489 Modifier = OMP_sch_modifier_nonmonotonic;
2490 break;
2491 case OMPC_SCHEDULE_MODIFIER_simd:
2492 if (Schedule == OMP_sch_static_chunked)
2493 Schedule = OMP_sch_static_balanced_chunked;
2494 break;
2497 break;
2498 }
2499 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2500 // If the static schedule kind is specified or if the ordered clause is
2501 // specified, and if the nonmonotonic modifier is not specified, the effect is
2502 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2503 // modifier is specified, the effect is as if the nonmonotonic modifier is
2504 // specified.
2505 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2506 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2507 Schedule == OMP_sch_static_balanced_chunked ||
2508 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2509 Schedule == OMP_dist_sch_static_chunked ||
2510 Schedule == OMP_dist_sch_static))
2511 Modifier = OMP_sch_modifier_nonmonotonic;
2512 }
2513 return Schedule | Modifier;
2514}
2515
2518 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2519 bool Ordered, const DispatchRTInput &DispatchValues) {
2520 if (!CGF.HaveInsertPoint())
2521 return;
2522 OpenMPSchedType Schedule = getRuntimeSchedule(
2523 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2524 assert(Ordered ||
2525 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2526 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2527 Schedule != OMP_sch_static_balanced_chunked));
2528 // Call __kmpc_dispatch_init(
2529 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2530 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2531 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2532
2533 // If the Chunk was not specified in the clause - use default value 1.
2534 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2535 : CGF.Builder.getIntN(IVSize, 1);
2536 llvm::Value *Args[] = {
2537 emitUpdateLocation(CGF, Loc),
2538 getThreadID(CGF, Loc),
2539 CGF.Builder.getInt32(addMonoNonMonoModifier(
2540 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2541 DispatchValues.LB, // Lower
2542 DispatchValues.UB, // Upper
2543 CGF.Builder.getIntN(IVSize, 1), // Stride
2544 Chunk // Chunk
2545 };
2546 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2547 Args);
2548}
2549
2551 SourceLocation Loc) {
2552 if (!CGF.HaveInsertPoint())
2553 return;
2554 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2555 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2556 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2557}
2558
2560 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2561 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2563 const CGOpenMPRuntime::StaticRTInput &Values) {
2564 if (!CGF.HaveInsertPoint())
2565 return;
2566
2567 assert(!Values.Ordered);
2568 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2569 Schedule == OMP_sch_static_balanced_chunked ||
2570 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2571 Schedule == OMP_dist_sch_static ||
2572 Schedule == OMP_dist_sch_static_chunked);
2573
2574 // Call __kmpc_for_static_init(
2575 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2576 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2577 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2578 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2579 llvm::Value *Chunk = Values.Chunk;
2580 if (Chunk == nullptr) {
2581 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2582 Schedule == OMP_dist_sch_static) &&
2583 "expected static non-chunked schedule");
2584 // If the Chunk was not specified in the clause - use default value 1.
2585 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2586 } else {
2587 assert((Schedule == OMP_sch_static_chunked ||
2588 Schedule == OMP_sch_static_balanced_chunked ||
2589 Schedule == OMP_ord_static_chunked ||
2590 Schedule == OMP_dist_sch_static_chunked) &&
2591 "expected static chunked schedule");
2592 }
2593 llvm::Value *Args[] = {
2594 UpdateLocation,
2595 ThreadId,
2596 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2597 M2)), // Schedule type
2598 Values.IL.emitRawPointer(CGF), // &isLastIter
2599 Values.LB.emitRawPointer(CGF), // &LB
2600 Values.UB.emitRawPointer(CGF), // &UB
2601 Values.ST.emitRawPointer(CGF), // &Stride
2602 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2603 Chunk // Chunk
2604 };
2605 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2606}
2607
2609 SourceLocation Loc,
2610 OpenMPDirectiveKind DKind,
2611 const OpenMPScheduleTy &ScheduleKind,
2612 const StaticRTInput &Values) {
2613 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2614 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2615 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2616 "Expected loop-based or sections-based directive.");
2617 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2619 ? OMP_IDENT_WORK_LOOP
2620 : OMP_IDENT_WORK_SECTIONS);
2621 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2622 llvm::FunctionCallee StaticInitFunction =
2623 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2624 false);
2626 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2627 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2628}
2629
2633 const CGOpenMPRuntime::StaticRTInput &Values) {
2634 OpenMPSchedType ScheduleNum =
2635 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2636 llvm::Value *UpdatedLocation =
2637 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2638 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2639 llvm::FunctionCallee StaticInitFunction;
2640 bool isGPUDistribute =
2641 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2642 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2643 Values.IVSize, Values.IVSigned, isGPUDistribute);
2644
2645 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2646 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2648}
2649
2651 SourceLocation Loc,
2652 OpenMPDirectiveKind DKind) {
2653 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2654 DKind == OMPD_sections) &&
2655 "Expected distribute, for, or sections directive kind");
2656 if (!CGF.HaveInsertPoint())
2657 return;
2658 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2659 llvm::Value *Args[] = {
2660 emitUpdateLocation(CGF, Loc,
2662 (DKind == OMPD_target_teams_loop)
2663 ? OMP_IDENT_WORK_DISTRIBUTE
2664 : isOpenMPLoopDirective(DKind)
2665 ? OMP_IDENT_WORK_LOOP
2666 : OMP_IDENT_WORK_SECTIONS),
2667 getThreadID(CGF, Loc)};
2669 if (isOpenMPDistributeDirective(DKind) &&
2670 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2671 CGF.EmitRuntimeCall(
2672 OMPBuilder.getOrCreateRuntimeFunction(
2673 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2674 Args);
2675 else
2676 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2677 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2678 Args);
2679}
2680
2682 SourceLocation Loc,
2683 unsigned IVSize,
2684 bool IVSigned) {
2685 if (!CGF.HaveInsertPoint())
2686 return;
2687 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2688 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2689 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2690 Args);
2691}
2692
2694 SourceLocation Loc, unsigned IVSize,
2695 bool IVSigned, Address IL,
2696 Address LB, Address UB,
2697 Address ST) {
2698 // Call __kmpc_dispatch_next(
2699 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2700 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2701 // kmp_int[32|64] *p_stride);
2702 llvm::Value *Args[] = {
2703 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2704 IL.emitRawPointer(CGF), // &isLastIter
2705 LB.emitRawPointer(CGF), // &Lower
2706 UB.emitRawPointer(CGF), // &Upper
2707 ST.emitRawPointer(CGF) // &Stride
2708 };
2709 llvm::Value *Call = CGF.EmitRuntimeCall(
2710 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2711 return CGF.EmitScalarConversion(
2712 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2713 CGF.getContext().BoolTy, Loc);
2714}
2715
2717 const Expr *Message,
2718 SourceLocation Loc) {
2719 if (!Message)
2720 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2721 return CGF.EmitScalarExpr(Message);
2722}
2723
2724llvm::Value *
2726 SourceLocation Loc) {
2727 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2728 // as if sev-level is fatal."
2729 return llvm::ConstantInt::get(CGM.Int32Ty,
2730 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2731}
2732
2734 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2736 SourceLocation SeverityLoc, const Expr *Message,
2737 SourceLocation MessageLoc) {
2738 if (!CGF.HaveInsertPoint())
2739 return;
2741 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2742 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2743 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2744 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2745 // messsage) if strict modifier is used.
2746 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2747 if (Modifier == OMPC_NUMTHREADS_strict) {
2748 FnID = OMPRTL___kmpc_push_num_threads_strict;
2749 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2750 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2751 }
2752 CGF.EmitRuntimeCall(
2753 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2754}
2755
2757 ProcBindKind ProcBind,
2758 SourceLocation Loc) {
2759 if (!CGF.HaveInsertPoint())
2760 return;
2761 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2762 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2763 llvm::Value *Args[] = {
2764 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2765 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2766 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2767 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2768 Args);
2769}
2770
2772 SourceLocation Loc, llvm::AtomicOrdering AO) {
2773 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2774 OMPBuilder.createFlush(CGF.Builder);
2775 } else {
2776 if (!CGF.HaveInsertPoint())
2777 return;
2778 // Build call void __kmpc_flush(ident_t *loc)
2779 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2780 CGM.getModule(), OMPRTL___kmpc_flush),
2781 emitUpdateLocation(CGF, Loc));
2782 }
2783}
2784
2785namespace {
2786/// Indexes of fields for type kmp_task_t.
2787enum KmpTaskTFields {
2788 /// List of shared variables.
2789 KmpTaskTShareds,
2790 /// Task routine.
2791 KmpTaskTRoutine,
2792 /// Partition id for the untied tasks.
2793 KmpTaskTPartId,
2794 /// Function with call of destructors for private variables.
2795 Data1,
2796 /// Task priority.
2797 Data2,
2798 /// (Taskloops only) Lower bound.
2799 KmpTaskTLowerBound,
2800 /// (Taskloops only) Upper bound.
2801 KmpTaskTUpperBound,
2802 /// (Taskloops only) Stride.
2803 KmpTaskTStride,
2804 /// (Taskloops only) Is last iteration flag.
2805 KmpTaskTLastIter,
2806 /// (Taskloops only) Reduction data.
2807 KmpTaskTReductions,
2808};
2809} // anonymous namespace
2810
2812 // If we are in simd mode or there are no entries, we don't need to do
2813 // anything.
2814 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2815 return;
2816
2817 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2818 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2819 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2820 SourceLocation Loc;
2821 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2822 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2823 E = CGM.getContext().getSourceManager().fileinfo_end();
2824 I != E; ++I) {
2825 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2826 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2827 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2828 I->getFirst(), EntryInfo.Line, 1);
2829 break;
2830 }
2831 }
2832 }
2833 switch (Kind) {
2834 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2835 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2836 DiagnosticsEngine::Error, "Offloading entry for target region in "
2837 "%0 is incorrect: either the "
2838 "address or the ID is invalid.");
2839 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2840 } break;
2841 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2842 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2843 DiagnosticsEngine::Error, "Offloading entry for declare target "
2844 "variable %0 is incorrect: the "
2845 "address is invalid.");
2846 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2847 } break;
2848 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2849 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2851 "Offloading entry for declare target variable is incorrect: the "
2852 "address is invalid.");
2853 CGM.getDiags().Report(DiagID);
2854 } break;
2855 }
2856 };
2857
2858 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2859}
2860
2862 if (!KmpRoutineEntryPtrTy) {
2863 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2864 ASTContext &C = CGM.getContext();
2865 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2867 KmpRoutineEntryPtrQTy = C.getPointerType(
2868 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2869 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2870 }
2871}
2872
2873namespace {
2874struct PrivateHelpersTy {
2875 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2876 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2877 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2878 PrivateElemInit(PrivateElemInit) {}
2879 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2880 const Expr *OriginalRef = nullptr;
2881 const VarDecl *Original = nullptr;
2882 const VarDecl *PrivateCopy = nullptr;
2883 const VarDecl *PrivateElemInit = nullptr;
2884 bool isLocalPrivate() const {
2885 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2886 }
2887};
2888typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2889} // anonymous namespace
2890
2891static bool isAllocatableDecl(const VarDecl *VD) {
2892 const VarDecl *CVD = VD->getCanonicalDecl();
2893 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2894 return false;
2895 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2896 // Use the default allocation.
2897 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2898 !AA->getAllocator());
2899}
2900
2901static RecordDecl *
2903 if (!Privates.empty()) {
2904 ASTContext &C = CGM.getContext();
2905 // Build struct .kmp_privates_t. {
2906 // /* private vars */
2907 // };
2908 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2909 RD->startDefinition();
2910 for (const auto &Pair : Privates) {
2911 const VarDecl *VD = Pair.second.Original;
2913 // If the private variable is a local variable with lvalue ref type,
2914 // allocate the pointer instead of the pointee type.
2915 if (Pair.second.isLocalPrivate()) {
2916 if (VD->getType()->isLValueReferenceType())
2917 Type = C.getPointerType(Type);
2918 if (isAllocatableDecl(VD))
2919 Type = C.getPointerType(Type);
2920 }
2922 if (VD->hasAttrs()) {
2923 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2924 E(VD->getAttrs().end());
2925 I != E; ++I)
2926 FD->addAttr(*I);
2927 }
2928 }
2929 RD->completeDefinition();
2930 return RD;
2931 }
2932 return nullptr;
2933}
2934
2935static RecordDecl *
2937 QualType KmpInt32Ty,
2938 QualType KmpRoutineEntryPointerQTy) {
2939 ASTContext &C = CGM.getContext();
2940 // Build struct kmp_task_t {
2941 // void * shareds;
2942 // kmp_routine_entry_t routine;
2943 // kmp_int32 part_id;
2944 // kmp_cmplrdata_t data1;
2945 // kmp_cmplrdata_t data2;
2946 // For taskloops additional fields:
2947 // kmp_uint64 lb;
2948 // kmp_uint64 ub;
2949 // kmp_int64 st;
2950 // kmp_int32 liter;
2951 // void * reductions;
2952 // };
2953 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2954 UD->startDefinition();
2955 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2956 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2957 UD->completeDefinition();
2958 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
2959 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2960 RD->startDefinition();
2961 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2962 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2963 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2964 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2965 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2966 if (isOpenMPTaskLoopDirective(Kind)) {
2967 QualType KmpUInt64Ty =
2968 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2969 QualType KmpInt64Ty =
2970 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2971 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2972 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2973 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2974 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2975 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2976 }
2977 RD->completeDefinition();
2978 return RD;
2979}
2980
2981static RecordDecl *
2983 ArrayRef<PrivateDataTy> Privates) {
2984 ASTContext &C = CGM.getContext();
2985 // Build struct kmp_task_t_with_privates {
2986 // kmp_task_t task_data;
2987 // .kmp_privates_t. privates;
2988 // };
2989 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2990 RD->startDefinition();
2991 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2992 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2993 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
2994 RD->completeDefinition();
2995 return RD;
2996}
2997
2998/// Emit a proxy function which accepts kmp_task_t as the second
2999/// argument.
3000/// \code
3001/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3002/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3003/// For taskloops:
3004/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3005/// tt->reductions, tt->shareds);
3006/// return 0;
3007/// }
3008/// \endcode
3009static llvm::Function *
3011 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3012 QualType KmpTaskTWithPrivatesPtrQTy,
3013 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3014 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3015 llvm::Value *TaskPrivatesMap) {
3016 ASTContext &C = CGM.getContext();
3017 FunctionArgList Args;
3018 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3020 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3021 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3023 Args.push_back(&GtidArg);
3024 Args.push_back(&TaskTypeArg);
3025 const auto &TaskEntryFnInfo =
3026 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3027 llvm::FunctionType *TaskEntryTy =
3028 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3029 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3030 auto *TaskEntry = llvm::Function::Create(
3031 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3032 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3033 TaskEntry->setDoesNotRecurse();
3034 CodeGenFunction CGF(CGM);
3035 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3036 Loc, Loc);
3037
3038 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3039 // tt,
3040 // For taskloops:
3041 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3042 // tt->task_data.shareds);
3043 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3044 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3045 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3046 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3047 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3048 const auto *KmpTaskTWithPrivatesQTyRD =
3049 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3050 LValue Base =
3051 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3052 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3053 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3054 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3055 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3056
3057 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3058 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3059 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3060 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3061 CGF.ConvertTypeForMem(SharedsPtrTy));
3062
3063 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3064 llvm::Value *PrivatesParam;
3065 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3066 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3067 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3068 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3069 } else {
3070 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3071 }
3072
3073 llvm::Value *CommonArgs[] = {
3074 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3075 CGF.Builder
3076 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3077 CGF.VoidPtrTy, CGF.Int8Ty)
3078 .emitRawPointer(CGF)};
3079 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3080 std::end(CommonArgs));
3081 if (isOpenMPTaskLoopDirective(Kind)) {
3082 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3083 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3084 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3085 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3086 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3087 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3088 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3089 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3090 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3091 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3092 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3093 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3094 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3095 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3096 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3097 CallArgs.push_back(LBParam);
3098 CallArgs.push_back(UBParam);
3099 CallArgs.push_back(StParam);
3100 CallArgs.push_back(LIParam);
3101 CallArgs.push_back(RParam);
3102 }
3103 CallArgs.push_back(SharedsParam);
3104
3105 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3106 CallArgs);
3107 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3108 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3109 CGF.FinishFunction();
3110 return TaskEntry;
3111}
3112
3114 SourceLocation Loc,
3115 QualType KmpInt32Ty,
3116 QualType KmpTaskTWithPrivatesPtrQTy,
3117 QualType KmpTaskTWithPrivatesQTy) {
3118 ASTContext &C = CGM.getContext();
3119 FunctionArgList Args;
3120 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3122 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3123 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3125 Args.push_back(&GtidArg);
3126 Args.push_back(&TaskTypeArg);
3127 const auto &DestructorFnInfo =
3128 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3129 llvm::FunctionType *DestructorFnTy =
3130 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3131 std::string Name =
3132 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3133 auto *DestructorFn =
3134 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3135 Name, &CGM.getModule());
3136 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3137 DestructorFnInfo);
3138 DestructorFn->setDoesNotRecurse();
3139 CodeGenFunction CGF(CGM);
3140 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3141 Args, Loc, Loc);
3142
3143 LValue Base = CGF.EmitLoadOfPointerLValue(
3144 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3145 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3146 const auto *KmpTaskTWithPrivatesQTyRD =
3147 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3148 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3149 Base = CGF.EmitLValueForField(Base, *FI);
3150 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3151 if (QualType::DestructionKind DtorKind =
3152 Field->getType().isDestructedType()) {
3153 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3154 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3155 }
3156 }
3157 CGF.FinishFunction();
3158 return DestructorFn;
3159}
3160
3161/// Emit a privates mapping function for correct handling of private and
3162/// firstprivate variables.
3163/// \code
3164/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3165/// **noalias priv1,..., <tyn> **noalias privn) {
3166/// *priv1 = &.privates.priv1;
3167/// ...;
3168/// *privn = &.privates.privn;
3169/// }
3170/// \endcode
3171static llvm::Value *
3173 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3174 ArrayRef<PrivateDataTy> Privates) {
3175 ASTContext &C = CGM.getContext();
3176 FunctionArgList Args;
3177 ImplicitParamDecl TaskPrivatesArg(
3178 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3179 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3181 Args.push_back(&TaskPrivatesArg);
3182 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3183 unsigned Counter = 1;
3184 for (const Expr *E : Data.PrivateVars) {
3185 Args.push_back(ImplicitParamDecl::Create(
3186 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3187 C.getPointerType(C.getPointerType(E->getType()))
3188 .withConst()
3189 .withRestrict(),
3191 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3192 PrivateVarsPos[VD] = Counter;
3193 ++Counter;
3194 }
3195 for (const Expr *E : Data.FirstprivateVars) {
3196 Args.push_back(ImplicitParamDecl::Create(
3197 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3198 C.getPointerType(C.getPointerType(E->getType()))
3199 .withConst()
3200 .withRestrict(),
3202 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3203 PrivateVarsPos[VD] = Counter;
3204 ++Counter;
3205 }
3206 for (const Expr *E : Data.LastprivateVars) {
3207 Args.push_back(ImplicitParamDecl::Create(
3208 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3209 C.getPointerType(C.getPointerType(E->getType()))
3210 .withConst()
3211 .withRestrict(),
3213 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3214 PrivateVarsPos[VD] = Counter;
3215 ++Counter;
3216 }
3217 for (const VarDecl *VD : Data.PrivateLocals) {
3219 if (VD->getType()->isLValueReferenceType())
3220 Ty = C.getPointerType(Ty);
3221 if (isAllocatableDecl(VD))
3222 Ty = C.getPointerType(Ty);
3223 Args.push_back(ImplicitParamDecl::Create(
3224 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3225 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3227 PrivateVarsPos[VD] = Counter;
3228 ++Counter;
3229 }
3230 const auto &TaskPrivatesMapFnInfo =
3231 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3232 llvm::FunctionType *TaskPrivatesMapTy =
3233 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3234 std::string Name =
3235 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3236 auto *TaskPrivatesMap = llvm::Function::Create(
3237 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3238 &CGM.getModule());
3239 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3240 TaskPrivatesMapFnInfo);
3241 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3242 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3243 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3244 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3245 }
3246 CodeGenFunction CGF(CGM);
3247 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3248 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3249
3250 // *privi = &.privates.privi;
3251 LValue Base = CGF.EmitLoadOfPointerLValue(
3252 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3253 TaskPrivatesArg.getType()->castAs<PointerType>());
3254 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3255 Counter = 0;
3256 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3257 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3258 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3259 LValue RefLVal =
3260 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3261 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3262 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3263 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3264 ++Counter;
3265 }
3266 CGF.FinishFunction();
3267 return TaskPrivatesMap;
3268}
3269
3270/// Emit initialization for private variables in task-based directives.
3272 const OMPExecutableDirective &D,
3273 Address KmpTaskSharedsPtr, LValue TDBase,
3274 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3275 QualType SharedsTy, QualType SharedsPtrTy,
3276 const OMPTaskDataTy &Data,
3277 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3278 ASTContext &C = CGF.getContext();
3279 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3280 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3281 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3282 ? OMPD_taskloop
3283 : OMPD_task;
3284 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3285 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3286 LValue SrcBase;
3287 bool IsTargetTask =
3288 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3289 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3290 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3291 // PointersArray, SizesArray, and MappersArray. The original variables for
3292 // these arrays are not captured and we get their addresses explicitly.
3293 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3294 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3295 SrcBase = CGF.MakeAddrLValue(
3297 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3298 CGF.ConvertTypeForMem(SharedsTy)),
3299 SharedsTy);
3300 }
3301 FI = FI->getType()->castAsRecordDecl()->field_begin();
3302 for (const PrivateDataTy &Pair : Privates) {
3303 // Do not initialize private locals.
3304 if (Pair.second.isLocalPrivate()) {
3305 ++FI;
3306 continue;
3307 }
3308 const VarDecl *VD = Pair.second.PrivateCopy;
3309 const Expr *Init = VD->getAnyInitializer();
3310 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3311 !CGF.isTrivialInitializer(Init)))) {
3312 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3313 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3314 const VarDecl *OriginalVD = Pair.second.Original;
3315 // Check if the variable is the target-based BasePointersArray,
3316 // PointersArray, SizesArray, or MappersArray.
3317 LValue SharedRefLValue;
3318 QualType Type = PrivateLValue.getType();
3319 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3320 if (IsTargetTask && !SharedField) {
3321 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3322 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3323 cast<CapturedDecl>(OriginalVD->getDeclContext())
3324 ->getNumParams() == 0 &&
3326 cast<CapturedDecl>(OriginalVD->getDeclContext())
3327 ->getDeclContext()) &&
3328 "Expected artificial target data variable.");
3329 SharedRefLValue =
3330 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3331 } else if (ForDup) {
3332 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3333 SharedRefLValue = CGF.MakeAddrLValue(
3334 SharedRefLValue.getAddress().withAlignment(
3335 C.getDeclAlign(OriginalVD)),
3336 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3337 SharedRefLValue.getTBAAInfo());
3338 } else if (CGF.LambdaCaptureFields.count(
3339 Pair.second.Original->getCanonicalDecl()) > 0 ||
3340 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3341 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3342 } else {
3343 // Processing for implicitly captured variables.
3344 InlinedOpenMPRegionRAII Region(
3345 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3346 /*HasCancel=*/false, /*NoInheritance=*/true);
3347 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3348 }
3349 if (Type->isArrayType()) {
3350 // Initialize firstprivate array.
3352 // Perform simple memcpy.
3353 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3354 } else {
3355 // Initialize firstprivate array using element-by-element
3356 // initialization.
3358 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3359 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3360 Address SrcElement) {
3361 // Clean up any temporaries needed by the initialization.
3362 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3363 InitScope.addPrivate(Elem, SrcElement);
3364 (void)InitScope.Privatize();
3365 // Emit initialization for single element.
3366 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3367 CGF, &CapturesInfo);
3368 CGF.EmitAnyExprToMem(Init, DestElement,
3369 Init->getType().getQualifiers(),
3370 /*IsInitializer=*/false);
3371 });
3372 }
3373 } else {
3374 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3375 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3376 (void)InitScope.Privatize();
3377 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3378 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3379 /*capturedByInit=*/false);
3380 }
3381 } else {
3382 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3383 }
3384 }
3385 ++FI;
3386 }
3387}
3388
3389/// Check if duplication function is required for taskloops.
3391 ArrayRef<PrivateDataTy> Privates) {
3392 bool InitRequired = false;
3393 for (const PrivateDataTy &Pair : Privates) {
3394 if (Pair.second.isLocalPrivate())
3395 continue;
3396 const VarDecl *VD = Pair.second.PrivateCopy;
3397 const Expr *Init = VD->getAnyInitializer();
3398 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3400 if (InitRequired)
3401 break;
3402 }
3403 return InitRequired;
3404}
3405
3406
3407/// Emit task_dup function (for initialization of
3408/// private/firstprivate/lastprivate vars and last_iter flag)
3409/// \code
3410/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3411/// lastpriv) {
3412/// // setup lastprivate flag
3413/// task_dst->last = lastpriv;
3414/// // could be constructor calls here...
3415/// }
3416/// \endcode
3417static llvm::Value *
3419 const OMPExecutableDirective &D,
3420 QualType KmpTaskTWithPrivatesPtrQTy,
3421 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3422 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3423 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3424 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3425 ASTContext &C = CGM.getContext();
3426 FunctionArgList Args;
3427 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3428 KmpTaskTWithPrivatesPtrQTy,
3430 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3431 KmpTaskTWithPrivatesPtrQTy,
3433 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3435 Args.push_back(&DstArg);
3436 Args.push_back(&SrcArg);
3437 Args.push_back(&LastprivArg);
3438 const auto &TaskDupFnInfo =
3439 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3440 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3441 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3442 auto *TaskDup = llvm::Function::Create(
3443 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3444 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3445 TaskDup->setDoesNotRecurse();
3446 CodeGenFunction CGF(CGM);
3447 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3448 Loc);
3449
3450 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3451 CGF.GetAddrOfLocalVar(&DstArg),
3452 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3453 // task_dst->liter = lastpriv;
3454 if (WithLastIter) {
3455 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3456 LValue Base = CGF.EmitLValueForField(
3457 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3458 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3459 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3460 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3461 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3462 }
3463
3464 // Emit initial values for private copies (if any).
3465 assert(!Privates.empty());
3466 Address KmpTaskSharedsPtr = Address::invalid();
3467 if (!Data.FirstprivateVars.empty()) {
3468 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3469 CGF.GetAddrOfLocalVar(&SrcArg),
3470 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3471 LValue Base = CGF.EmitLValueForField(
3472 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3473 KmpTaskSharedsPtr = Address(
3475 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3476 KmpTaskTShareds)),
3477 Loc),
3478 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3479 }
3480 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3481 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3482 CGF.FinishFunction();
3483 return TaskDup;
3484}
3485
3486/// Checks if destructor function is required to be generated.
3487/// \return true if cleanups are required, false otherwise.
3488static bool
3489checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3490 ArrayRef<PrivateDataTy> Privates) {
3491 for (const PrivateDataTy &P : Privates) {
3492 if (P.second.isLocalPrivate())
3493 continue;
3494 QualType Ty = P.second.Original->getType().getNonReferenceType();
3495 if (Ty.isDestructedType())
3496 return true;
3497 }
3498 return false;
3499}
3500
3501namespace {
3502/// Loop generator for OpenMP iterator expression.
3503class OMPIteratorGeneratorScope final
3505 CodeGenFunction &CGF;
3506 const OMPIteratorExpr *E = nullptr;
3507 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3508 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3509 OMPIteratorGeneratorScope() = delete;
3510 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3511
3512public:
3513 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3514 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3515 if (!E)
3516 return;
3517 SmallVector<llvm::Value *, 4> Uppers;
3518 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3519 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3520 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3521 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3522 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3523 addPrivate(
3524 HelperData.CounterVD,
3525 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3526 }
3527 Privatize();
3528
3529 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3530 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3531 LValue CLVal =
3532 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3533 HelperData.CounterVD->getType());
3534 // Counter = 0;
3535 CGF.EmitStoreOfScalar(
3536 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3537 CLVal);
3538 CodeGenFunction::JumpDest &ContDest =
3539 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3540 CodeGenFunction::JumpDest &ExitDest =
3541 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3542 // N = <number-of_iterations>;
3543 llvm::Value *N = Uppers[I];
3544 // cont:
3545 // if (Counter < N) goto body; else goto exit;
3546 CGF.EmitBlock(ContDest.getBlock());
3547 auto *CVal =
3548 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3549 llvm::Value *Cmp =
3550 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3551 ? CGF.Builder.CreateICmpSLT(CVal, N)
3552 : CGF.Builder.CreateICmpULT(CVal, N);
3553 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3554 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3555 // body:
3556 CGF.EmitBlock(BodyBB);
3557 // Iteri = Begini + Counter * Stepi;
3558 CGF.EmitIgnoredExpr(HelperData.Update);
3559 }
3560 }
3561 ~OMPIteratorGeneratorScope() {
3562 if (!E)
3563 return;
3564 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3565 // Counter = Counter + 1;
3566 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3567 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3568 // goto cont;
3569 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3570 // exit:
3571 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3572 }
3573 }
3574};
3575} // namespace
3576
3577static std::pair<llvm::Value *, llvm::Value *>
3579 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3580 llvm::Value *Addr;
3581 if (OASE) {
3582 const Expr *Base = OASE->getBase();
3583 Addr = CGF.EmitScalarExpr(Base);
3584 } else {
3585 Addr = CGF.EmitLValue(E).getPointer(CGF);
3586 }
3587 llvm::Value *SizeVal;
3588 QualType Ty = E->getType();
3589 if (OASE) {
3590 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3591 for (const Expr *SE : OASE->getDimensions()) {
3592 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3593 Sz = CGF.EmitScalarConversion(
3594 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3595 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3596 }
3597 } else if (const auto *ASE =
3598 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3599 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3600 Address UpAddrAddress = UpAddrLVal.getAddress();
3601 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3602 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3603 /*Idx0=*/1);
3604 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3605 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3606 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3607 } else {
3608 SizeVal = CGF.getTypeSize(Ty);
3609 }
3610 return std::make_pair(Addr, SizeVal);
3611}
3612
3613/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3614static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3615 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3616 if (KmpTaskAffinityInfoTy.isNull()) {
3617 RecordDecl *KmpAffinityInfoRD =
3618 C.buildImplicitRecord("kmp_task_affinity_info_t");
3619 KmpAffinityInfoRD->startDefinition();
3620 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3621 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3622 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3623 KmpAffinityInfoRD->completeDefinition();
3624 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3625 }
3626}
3627
3630 const OMPExecutableDirective &D,
3631 llvm::Function *TaskFunction, QualType SharedsTy,
3632 Address Shareds, const OMPTaskDataTy &Data) {
3633 ASTContext &C = CGM.getContext();
3635 // Aggregate privates and sort them by the alignment.
3636 const auto *I = Data.PrivateCopies.begin();
3637 for (const Expr *E : Data.PrivateVars) {
3638 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3639 Privates.emplace_back(
3640 C.getDeclAlign(VD),
3641 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3642 /*PrivateElemInit=*/nullptr));
3643 ++I;
3644 }
3645 I = Data.FirstprivateCopies.begin();
3646 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3647 for (const Expr *E : Data.FirstprivateVars) {
3648 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3649 Privates.emplace_back(
3650 C.getDeclAlign(VD),
3651 PrivateHelpersTy(
3652 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3653 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3654 ++I;
3655 ++IElemInitRef;
3656 }
3657 I = Data.LastprivateCopies.begin();
3658 for (const Expr *E : Data.LastprivateVars) {
3659 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3660 Privates.emplace_back(
3661 C.getDeclAlign(VD),
3662 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3663 /*PrivateElemInit=*/nullptr));
3664 ++I;
3665 }
3666 for (const VarDecl *VD : Data.PrivateLocals) {
3667 if (isAllocatableDecl(VD))
3668 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3669 else
3670 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3671 }
3672 llvm::stable_sort(Privates,
3673 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3674 return L.first > R.first;
3675 });
3676 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3677 // Build type kmp_routine_entry_t (if not built yet).
3678 emitKmpRoutineEntryT(KmpInt32Ty);
3679 // Build type kmp_task_t (if not built yet).
3680 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3681 if (SavedKmpTaskloopTQTy.isNull()) {
3682 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3683 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3684 }
3686 } else {
3687 assert((D.getDirectiveKind() == OMPD_task ||
3688 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3689 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3690 "Expected taskloop, task or target directive");
3691 if (SavedKmpTaskTQTy.isNull()) {
3692 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3693 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3694 }
3696 }
3697 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3698 // Build particular struct kmp_task_t for the given task.
3699 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3701 CanQualType KmpTaskTWithPrivatesQTy =
3702 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3703 QualType KmpTaskTWithPrivatesPtrQTy =
3704 C.getPointerType(KmpTaskTWithPrivatesQTy);
3705 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3706 llvm::Value *KmpTaskTWithPrivatesTySize =
3707 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3708 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3709
3710 // Emit initial values for private copies (if any).
3711 llvm::Value *TaskPrivatesMap = nullptr;
3712 llvm::Type *TaskPrivatesMapTy =
3713 std::next(TaskFunction->arg_begin(), 3)->getType();
3714 if (!Privates.empty()) {
3715 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3716 TaskPrivatesMap =
3717 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3718 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3719 TaskPrivatesMap, TaskPrivatesMapTy);
3720 } else {
3721 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3722 cast<llvm::PointerType>(TaskPrivatesMapTy));
3723 }
3724 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3725 // kmp_task_t *tt);
3726 llvm::Function *TaskEntry = emitProxyTaskFunction(
3727 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3728 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3729 TaskPrivatesMap);
3730
3731 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3732 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3733 // kmp_routine_entry_t *task_entry);
3734 // Task flags. Format is taken from
3735 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3736 // description of kmp_tasking_flags struct.
3737 enum {
3738 TiedFlag = 0x1,
3739 FinalFlag = 0x2,
3740 DestructorsFlag = 0x8,
3741 PriorityFlag = 0x20,
3742 DetachableFlag = 0x40,
3743 FreeAgentFlag = 0x80,
3744 };
3745 unsigned Flags = Data.Tied ? TiedFlag : 0;
3746 bool NeedsCleanup = false;
3747 if (!Privates.empty()) {
3748 NeedsCleanup =
3749 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3750 if (NeedsCleanup)
3751 Flags = Flags | DestructorsFlag;
3752 }
3753 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3754 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3755 if (Kind == OMPC_THREADSET_omp_pool)
3756 Flags = Flags | FreeAgentFlag;
3757 }
3758 if (Data.Priority.getInt())
3759 Flags = Flags | PriorityFlag;
3760 if (D.hasClausesOfKind<OMPDetachClause>())
3761 Flags = Flags | DetachableFlag;
3762 llvm::Value *TaskFlags =
3763 Data.Final.getPointer()
3764 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3765 CGF.Builder.getInt32(FinalFlag),
3766 CGF.Builder.getInt32(/*C=*/0))
3767 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3768 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3769 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3771 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3773 TaskEntry, KmpRoutineEntryPtrTy)};
3774 llvm::Value *NewTask;
3775 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3776 // Check if we have any device clause associated with the directive.
3777 const Expr *Device = nullptr;
3778 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3779 Device = C->getDevice();
3780 // Emit device ID if any otherwise use default value.
3781 llvm::Value *DeviceID;
3782 if (Device)
3783 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3784 CGF.Int64Ty, /*isSigned=*/true);
3785 else
3786 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3787 AllocArgs.push_back(DeviceID);
3788 NewTask = CGF.EmitRuntimeCall(
3789 OMPBuilder.getOrCreateRuntimeFunction(
3790 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3791 AllocArgs);
3792 } else {
3793 NewTask =
3794 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3795 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3796 AllocArgs);
3797 }
3798 // Emit detach clause initialization.
3799 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3800 // task_descriptor);
3801 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3802 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3803 LValue EvtLVal = CGF.EmitLValue(Evt);
3804
3805 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3806 // int gtid, kmp_task_t *task);
3807 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3808 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3809 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3810 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3811 OMPBuilder.getOrCreateRuntimeFunction(
3812 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3813 {Loc, Tid, NewTask});
3814 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3815 Evt->getExprLoc());
3816 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3817 }
3818 // Process affinity clauses.
3819 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3820 // Process list of affinity data.
3821 ASTContext &C = CGM.getContext();
3822 Address AffinitiesArray = Address::invalid();
3823 // Calculate number of elements to form the array of affinity data.
3824 llvm::Value *NumOfElements = nullptr;
3825 unsigned NumAffinities = 0;
3826 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3827 if (const Expr *Modifier = C->getModifier()) {
3828 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3829 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3830 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3831 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3832 NumOfElements =
3833 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3834 }
3835 } else {
3836 NumAffinities += C->varlist_size();
3837 }
3838 }
3840 // Fields ids in kmp_task_affinity_info record.
3841 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3842
3843 QualType KmpTaskAffinityInfoArrayTy;
3844 if (NumOfElements) {
3845 NumOfElements = CGF.Builder.CreateNUWAdd(
3846 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3847 auto *OVE = new (C) OpaqueValueExpr(
3848 Loc,
3849 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3850 VK_PRValue);
3851 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3852 RValue::get(NumOfElements));
3853 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3855 /*IndexTypeQuals=*/0);
3856 // Properly emit variable-sized array.
3857 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3859 CGF.EmitVarDecl(*PD);
3860 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3861 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3862 /*isSigned=*/false);
3863 } else {
3864 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3866 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3867 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3868 AffinitiesArray =
3869 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3870 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3871 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3872 /*isSigned=*/false);
3873 }
3874
3875 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3876 // Fill array by elements without iterators.
3877 unsigned Pos = 0;
3878 bool HasIterator = false;
3879 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3880 if (C->getModifier()) {
3881 HasIterator = true;
3882 continue;
3883 }
3884 for (const Expr *E : C->varlist()) {
3885 llvm::Value *Addr;
3886 llvm::Value *Size;
3887 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3888 LValue Base =
3889 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3891 // affs[i].base_addr = &<Affinities[i].second>;
3892 LValue BaseAddrLVal = CGF.EmitLValueForField(
3893 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3894 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3895 BaseAddrLVal);
3896 // affs[i].len = sizeof(<Affinities[i].second>);
3897 LValue LenLVal = CGF.EmitLValueForField(
3898 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3899 CGF.EmitStoreOfScalar(Size, LenLVal);
3900 ++Pos;
3901 }
3902 }
3903 LValue PosLVal;
3904 if (HasIterator) {
3905 PosLVal = CGF.MakeAddrLValue(
3906 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3907 C.getSizeType());
3908 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3909 }
3910 // Process elements with iterators.
3911 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3912 const Expr *Modifier = C->getModifier();
3913 if (!Modifier)
3914 continue;
3915 OMPIteratorGeneratorScope IteratorScope(
3916 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3917 for (const Expr *E : C->varlist()) {
3918 llvm::Value *Addr;
3919 llvm::Value *Size;
3920 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3921 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3922 LValue Base =
3923 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3925 // affs[i].base_addr = &<Affinities[i].second>;
3926 LValue BaseAddrLVal = CGF.EmitLValueForField(
3927 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3928 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3929 BaseAddrLVal);
3930 // affs[i].len = sizeof(<Affinities[i].second>);
3931 LValue LenLVal = CGF.EmitLValueForField(
3932 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3933 CGF.EmitStoreOfScalar(Size, LenLVal);
3934 Idx = CGF.Builder.CreateNUWAdd(
3935 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3936 CGF.EmitStoreOfScalar(Idx, PosLVal);
3937 }
3938 }
3939 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3940 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3941 // naffins, kmp_task_affinity_info_t *affin_list);
3942 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3943 llvm::Value *GTid = getThreadID(CGF, Loc);
3944 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3945 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3946 // FIXME: Emit the function and ignore its result for now unless the
3947 // runtime function is properly implemented.
3948 (void)CGF.EmitRuntimeCall(
3949 OMPBuilder.getOrCreateRuntimeFunction(
3950 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3951 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3952 }
3953 llvm::Value *NewTaskNewTaskTTy =
3955 NewTask, KmpTaskTWithPrivatesPtrTy);
3956 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3957 KmpTaskTWithPrivatesQTy);
3958 LValue TDBase =
3959 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3960 // Fill the data in the resulting kmp_task_t record.
3961 // Copy shareds if there are any.
3962 Address KmpTaskSharedsPtr = Address::invalid();
3963 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
3964 KmpTaskSharedsPtr = Address(
3965 CGF.EmitLoadOfScalar(
3967 TDBase,
3968 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3969 Loc),
3970 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3971 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3972 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3973 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3974 }
3975 // Emit initial values for private copies (if any).
3977 if (!Privates.empty()) {
3978 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3979 SharedsTy, SharedsPtrTy, Data, Privates,
3980 /*ForDup=*/false);
3981 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3982 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3983 Result.TaskDupFn = emitTaskDupFunction(
3984 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3985 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3986 /*WithLastIter=*/!Data.LastprivateVars.empty());
3987 }
3988 }
3989 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3990 enum { Priority = 0, Destructors = 1 };
3991 // Provide pointer to function with destructors for privates.
3992 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3993 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
3994 assert(KmpCmplrdataUD->isUnion());
3995 if (NeedsCleanup) {
3996 llvm::Value *DestructorFn = emitDestructorsFunction(
3997 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3998 KmpTaskTWithPrivatesQTy);
3999 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4000 LValue DestructorsLV = CGF.EmitLValueForField(
4001 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4003 DestructorFn, KmpRoutineEntryPtrTy),
4004 DestructorsLV);
4005 }
4006 // Set priority.
4007 if (Data.Priority.getInt()) {
4008 LValue Data2LV = CGF.EmitLValueForField(
4009 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4010 LValue PriorityLV = CGF.EmitLValueForField(
4011 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4012 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4013 }
4014 Result.NewTask = NewTask;
4015 Result.TaskEntry = TaskEntry;
4016 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4017 Result.TDBase = TDBase;
4018 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4019 return Result;
4020}
4021
4022/// Translates internal dependency kind into the runtime kind.
4024 RTLDependenceKindTy DepKind;
4025 switch (K) {
4026 case OMPC_DEPEND_in:
4027 DepKind = RTLDependenceKindTy::DepIn;
4028 break;
4029 // Out and InOut dependencies must use the same code.
4030 case OMPC_DEPEND_out:
4031 case OMPC_DEPEND_inout:
4032 DepKind = RTLDependenceKindTy::DepInOut;
4033 break;
4034 case OMPC_DEPEND_mutexinoutset:
4035 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4036 break;
4037 case OMPC_DEPEND_inoutset:
4038 DepKind = RTLDependenceKindTy::DepInOutSet;
4039 break;
4040 case OMPC_DEPEND_outallmemory:
4041 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4042 break;
4043 case OMPC_DEPEND_source:
4044 case OMPC_DEPEND_sink:
4045 case OMPC_DEPEND_depobj:
4046 case OMPC_DEPEND_inoutallmemory:
4048 llvm_unreachable("Unknown task dependence type");
4049 }
4050 return DepKind;
4051}
4052
4053/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4054static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4055 QualType &FlagsTy) {
4056 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4057 if (KmpDependInfoTy.isNull()) {
4058 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4059 KmpDependInfoRD->startDefinition();
4060 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4061 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4062 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4063 KmpDependInfoRD->completeDefinition();
4064 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4065 }
4066}
4067
4068std::pair<llvm::Value *, LValue>
4070 SourceLocation Loc) {
4071 ASTContext &C = CGM.getContext();
4072 QualType FlagsTy;
4073 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4074 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4075 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4077 DepobjLVal.getAddress().withElementType(
4078 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4079 KmpDependInfoPtrTy->castAs<PointerType>());
4080 Address DepObjAddr = CGF.Builder.CreateGEP(
4081 CGF, Base.getAddress(),
4082 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4083 LValue NumDepsBase = CGF.MakeAddrLValue(
4084 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4085 // NumDeps = deps[i].base_addr;
4086 LValue BaseAddrLVal = CGF.EmitLValueForField(
4087 NumDepsBase,
4088 *std::next(KmpDependInfoRD->field_begin(),
4089 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4090 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4091 return std::make_pair(NumDeps, Base);
4092}
4093
4094static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4095 llvm::PointerUnion<unsigned *, LValue *> Pos,
4097 Address DependenciesArray) {
4098 CodeGenModule &CGM = CGF.CGM;
4099 ASTContext &C = CGM.getContext();
4100 QualType FlagsTy;
4101 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4102 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4103 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4104
4105 OMPIteratorGeneratorScope IteratorScope(
4106 CGF, cast_or_null<OMPIteratorExpr>(
4107 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4108 : nullptr));
4109 for (const Expr *E : Data.DepExprs) {
4110 llvm::Value *Addr;
4111 llvm::Value *Size;
4112
4113 // The expression will be a nullptr in the 'omp_all_memory' case.
4114 if (E) {
4115 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4116 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4117 } else {
4118 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4119 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4120 }
4121 LValue Base;
4122 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4123 Base = CGF.MakeAddrLValue(
4124 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4125 } else {
4126 assert(E && "Expected a non-null expression");
4127 LValue &PosLVal = *cast<LValue *>(Pos);
4128 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4129 Base = CGF.MakeAddrLValue(
4130 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4131 }
4132 // deps[i].base_addr = &<Dependencies[i].second>;
4133 LValue BaseAddrLVal = CGF.EmitLValueForField(
4134 Base,
4135 *std::next(KmpDependInfoRD->field_begin(),
4136 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4137 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4138 // deps[i].len = sizeof(<Dependencies[i].second>);
4139 LValue LenLVal = CGF.EmitLValueForField(
4140 Base, *std::next(KmpDependInfoRD->field_begin(),
4141 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4142 CGF.EmitStoreOfScalar(Size, LenLVal);
4143 // deps[i].flags = <Dependencies[i].first>;
4144 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4145 LValue FlagsLVal = CGF.EmitLValueForField(
4146 Base,
4147 *std::next(KmpDependInfoRD->field_begin(),
4148 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4150 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4151 FlagsLVal);
4152 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4153 ++(*P);
4154 } else {
4155 LValue &PosLVal = *cast<LValue *>(Pos);
4156 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4157 Idx = CGF.Builder.CreateNUWAdd(Idx,
4158 llvm::ConstantInt::get(Idx->getType(), 1));
4159 CGF.EmitStoreOfScalar(Idx, PosLVal);
4160 }
4161 }
4162}
4163
4167 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4168 "Expected depobj dependency kind.");
4170 SmallVector<LValue, 4> SizeLVals;
4171 ASTContext &C = CGF.getContext();
4172 {
4173 OMPIteratorGeneratorScope IteratorScope(
4174 CGF, cast_or_null<OMPIteratorExpr>(
4175 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4176 : nullptr));
4177 for (const Expr *E : Data.DepExprs) {
4178 llvm::Value *NumDeps;
4179 LValue Base;
4180 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4181 std::tie(NumDeps, Base) =
4182 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4183 LValue NumLVal = CGF.MakeAddrLValue(
4184 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4185 C.getUIntPtrType());
4186 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4187 NumLVal.getAddress());
4188 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4189 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4190 CGF.EmitStoreOfScalar(Add, NumLVal);
4191 SizeLVals.push_back(NumLVal);
4192 }
4193 }
4194 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4195 llvm::Value *Size =
4196 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4197 Sizes.push_back(Size);
4198 }
4199 return Sizes;
4200}
4201
4204 LValue PosLVal,
4206 Address DependenciesArray) {
4207 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4208 "Expected depobj dependency kind.");
4209 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4210 {
4211 OMPIteratorGeneratorScope IteratorScope(
4212 CGF, cast_or_null<OMPIteratorExpr>(
4213 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4214 : nullptr));
4215 for (const Expr *E : Data.DepExprs) {
4216 llvm::Value *NumDeps;
4217 LValue Base;
4218 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4219 std::tie(NumDeps, Base) =
4220 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4221
4222 // memcopy dependency data.
4223 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4224 ElSize,
4225 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4226 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4227 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4228 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4229
4230 // Increase pos.
4231 // pos += size;
4232 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4233 CGF.EmitStoreOfScalar(Add, PosLVal);
4234 }
4235 }
4236}
4237
4238std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4240 SourceLocation Loc) {
4241 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4242 return D.DepExprs.empty();
4243 }))
4244 return std::make_pair(nullptr, Address::invalid());
4245 // Process list of dependencies.
4246 ASTContext &C = CGM.getContext();
4247 Address DependenciesArray = Address::invalid();
4248 llvm::Value *NumOfElements = nullptr;
4249 unsigned NumDependencies = std::accumulate(
4250 Dependencies.begin(), Dependencies.end(), 0,
4251 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4252 return D.DepKind == OMPC_DEPEND_depobj
4253 ? V
4254 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4255 });
4256 QualType FlagsTy;
4257 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4258 bool HasDepobjDeps = false;
4259 bool HasRegularWithIterators = false;
4260 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4261 llvm::Value *NumOfRegularWithIterators =
4262 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4263 // Calculate number of depobj dependencies and regular deps with the
4264 // iterators.
4265 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4266 if (D.DepKind == OMPC_DEPEND_depobj) {
4269 for (llvm::Value *Size : Sizes) {
4270 NumOfDepobjElements =
4271 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4272 }
4273 HasDepobjDeps = true;
4274 continue;
4275 }
4276 // Include number of iterations, if any.
4277
4278 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4279 llvm::Value *ClauseIteratorSpace =
4280 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4281 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4282 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4283 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4284 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4285 }
4286 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4287 ClauseIteratorSpace,
4288 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4289 NumOfRegularWithIterators =
4290 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4291 HasRegularWithIterators = true;
4292 continue;
4293 }
4294 }
4295
4296 QualType KmpDependInfoArrayTy;
4297 if (HasDepobjDeps || HasRegularWithIterators) {
4298 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4299 /*isSigned=*/false);
4300 if (HasDepobjDeps) {
4301 NumOfElements =
4302 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4303 }
4304 if (HasRegularWithIterators) {
4305 NumOfElements =
4306 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4307 }
4308 auto *OVE = new (C) OpaqueValueExpr(
4309 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4310 VK_PRValue);
4311 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4312 RValue::get(NumOfElements));
4313 KmpDependInfoArrayTy =
4314 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4315 /*IndexTypeQuals=*/0);
4316 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4317 // Properly emit variable-sized array.
4318 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4320 CGF.EmitVarDecl(*PD);
4321 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4322 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4323 /*isSigned=*/false);
4324 } else {
4325 KmpDependInfoArrayTy = C.getConstantArrayType(
4326 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4327 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4328 DependenciesArray =
4329 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4330 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4331 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4332 /*isSigned=*/false);
4333 }
4334 unsigned Pos = 0;
4335 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4336 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4337 continue;
4338 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4339 }
4340 // Copy regular dependencies with iterators.
4341 LValue PosLVal = CGF.MakeAddrLValue(
4342 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4343 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4344 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4345 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4346 continue;
4347 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4348 }
4349 // Copy final depobj arrays without iterators.
4350 if (HasDepobjDeps) {
4351 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4352 if (Dep.DepKind != OMPC_DEPEND_depobj)
4353 continue;
4354 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4355 }
4356 }
4357 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4358 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4359 return std::make_pair(NumOfElements, DependenciesArray);
4360}
4361
4363 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4364 SourceLocation Loc) {
4365 if (Dependencies.DepExprs.empty())
4366 return Address::invalid();
4367 // Process list of dependencies.
4368 ASTContext &C = CGM.getContext();
4369 Address DependenciesArray = Address::invalid();
4370 unsigned NumDependencies = Dependencies.DepExprs.size();
4371 QualType FlagsTy;
4372 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4373 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4374
4375 llvm::Value *Size;
4376 // Define type kmp_depend_info[<Dependencies.size()>];
4377 // For depobj reserve one extra element to store the number of elements.
4378 // It is required to handle depobj(x) update(in) construct.
4379 // kmp_depend_info[<Dependencies.size()>] deps;
4380 llvm::Value *NumDepsVal;
4381 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4382 if (const auto *IE =
4383 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4384 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4385 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4386 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4387 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4388 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4389 }
4390 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4391 NumDepsVal);
4392 CharUnits SizeInBytes =
4393 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4394 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4395 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4396 NumDepsVal =
4397 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4398 } else {
4399 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4400 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4401 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4402 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4403 Size = CGM.getSize(Sz.alignTo(Align));
4404 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4405 }
4406 // Need to allocate on the dynamic memory.
4407 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4408 // Use default allocator.
4409 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4410 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4411
4412 llvm::Value *Addr =
4413 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4414 CGM.getModule(), OMPRTL___kmpc_alloc),
4415 Args, ".dep.arr.addr");
4416 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4418 Addr, CGF.Builder.getPtrTy(0));
4419 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4420 // Write number of elements in the first element of array for depobj.
4421 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4422 // deps[i].base_addr = NumDependencies;
4423 LValue BaseAddrLVal = CGF.EmitLValueForField(
4424 Base,
4425 *std::next(KmpDependInfoRD->field_begin(),
4426 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4427 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4428 llvm::PointerUnion<unsigned *, LValue *> Pos;
4429 unsigned Idx = 1;
4430 LValue PosLVal;
4431 if (Dependencies.IteratorExpr) {
4432 PosLVal = CGF.MakeAddrLValue(
4433 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4434 C.getSizeType());
4435 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4436 /*IsInit=*/true);
4437 Pos = &PosLVal;
4438 } else {
4439 Pos = &Idx;
4440 }
4441 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4442 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4443 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4444 CGF.Int8Ty);
4445 return DependenciesArray;
4446}
4447
4449 SourceLocation Loc) {
4450 ASTContext &C = CGM.getContext();
4451 QualType FlagsTy;
4452 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4453 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4454 C.VoidPtrTy.castAs<PointerType>());
4455 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4457 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4459 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4460 Addr.getElementType(), Addr.emitRawPointer(CGF),
4461 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4462 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4463 CGF.VoidPtrTy);
4464 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4465 // Use default allocator.
4466 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4467 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4468
4469 // _kmpc_free(gtid, addr, nullptr);
4470 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4471 CGM.getModule(), OMPRTL___kmpc_free),
4472 Args);
4473}
4474
4476 OpenMPDependClauseKind NewDepKind,
4477 SourceLocation Loc) {
4478 ASTContext &C = CGM.getContext();
4479 QualType FlagsTy;
4480 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4481 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4482 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4483 llvm::Value *NumDeps;
4484 LValue Base;
4485 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4486
4487 Address Begin = Base.getAddress();
4488 // Cast from pointer to array type to pointer to single element.
4489 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4490 Begin.emitRawPointer(CGF), NumDeps);
4491 // The basic structure here is a while-do loop.
4492 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4493 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4494 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4495 CGF.EmitBlock(BodyBB);
4496 llvm::PHINode *ElementPHI =
4497 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4498 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4499 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4500 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4501 Base.getTBAAInfo());
4502 // deps[i].flags = NewDepKind;
4503 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4504 LValue FlagsLVal = CGF.EmitLValueForField(
4505 Base, *std::next(KmpDependInfoRD->field_begin(),
4506 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4508 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4509 FlagsLVal);
4510
4511 // Shift the address forward by one element.
4512 llvm::Value *ElementNext =
4513 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4514 .emitRawPointer(CGF);
4515 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4516 llvm::Value *IsEmpty =
4517 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4518 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4519 // Done.
4520 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4521}
4522
4524 const OMPExecutableDirective &D,
4525 llvm::Function *TaskFunction,
4526 QualType SharedsTy, Address Shareds,
4527 const Expr *IfCond,
4528 const OMPTaskDataTy &Data) {
4529 if (!CGF.HaveInsertPoint())
4530 return;
4531
4533 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4534 llvm::Value *NewTask = Result.NewTask;
4535 llvm::Function *TaskEntry = Result.TaskEntry;
4536 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4537 LValue TDBase = Result.TDBase;
4538 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4539 // Process list of dependences.
4540 Address DependenciesArray = Address::invalid();
4541 llvm::Value *NumOfElements;
4542 std::tie(NumOfElements, DependenciesArray) =
4543 emitDependClause(CGF, Data.Dependences, Loc);
4544
4545 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4546 // libcall.
4547 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4548 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4549 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4550 // list is not empty
4551 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4552 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4553 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4554 llvm::Value *DepTaskArgs[7];
4555 if (!Data.Dependences.empty()) {
4556 DepTaskArgs[0] = UpLoc;
4557 DepTaskArgs[1] = ThreadID;
4558 DepTaskArgs[2] = NewTask;
4559 DepTaskArgs[3] = NumOfElements;
4560 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4561 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4562 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4563 }
4564 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4565 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4566 if (!Data.Tied) {
4567 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4568 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4569 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4570 }
4571 if (!Data.Dependences.empty()) {
4572 CGF.EmitRuntimeCall(
4573 OMPBuilder.getOrCreateRuntimeFunction(
4574 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4575 DepTaskArgs);
4576 } else {
4577 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4578 CGM.getModule(), OMPRTL___kmpc_omp_task),
4579 TaskArgs);
4580 }
4581 // Check if parent region is untied and build return for untied task;
4582 if (auto *Region =
4583 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4584 Region->emitUntiedSwitch(CGF);
4585 };
4586
4587 llvm::Value *DepWaitTaskArgs[7];
4588 if (!Data.Dependences.empty()) {
4589 DepWaitTaskArgs[0] = UpLoc;
4590 DepWaitTaskArgs[1] = ThreadID;
4591 DepWaitTaskArgs[2] = NumOfElements;
4592 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4593 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4594 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4595 DepWaitTaskArgs[6] =
4596 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4597 }
4598 auto &M = CGM.getModule();
4599 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4600 TaskEntry, &Data, &DepWaitTaskArgs,
4601 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4602 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4603 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4604 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4605 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4606 // is specified.
4607 if (!Data.Dependences.empty())
4608 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4609 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4610 DepWaitTaskArgs);
4611 // Call proxy_task_entry(gtid, new_task);
4612 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4613 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4614 Action.Enter(CGF);
4615 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4616 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4617 OutlinedFnArgs);
4618 };
4619
4620 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4621 // kmp_task_t *new_task);
4622 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4623 // kmp_task_t *new_task);
4625 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4626 M, OMPRTL___kmpc_omp_task_begin_if0),
4627 TaskArgs,
4628 OMPBuilder.getOrCreateRuntimeFunction(
4629 M, OMPRTL___kmpc_omp_task_complete_if0),
4630 TaskArgs);
4631 RCG.setAction(Action);
4632 RCG(CGF);
4633 };
4634
4635 if (IfCond) {
4636 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4637 } else {
4638 RegionCodeGenTy ThenRCG(ThenCodeGen);
4639 ThenRCG(CGF);
4640 }
4641}
4642
4644 const OMPLoopDirective &D,
4645 llvm::Function *TaskFunction,
4646 QualType SharedsTy, Address Shareds,
4647 const Expr *IfCond,
4648 const OMPTaskDataTy &Data) {
4649 if (!CGF.HaveInsertPoint())
4650 return;
4652 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4653 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4654 // libcall.
4655 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4656 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4657 // sched, kmp_uint64 grainsize, void *task_dup);
4658 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4659 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4660 llvm::Value *IfVal;
4661 if (IfCond) {
4662 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4663 /*isSigned=*/true);
4664 } else {
4665 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4666 }
4667
4668 LValue LBLVal = CGF.EmitLValueForField(
4669 Result.TDBase,
4670 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4671 const auto *LBVar =
4672 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4673 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4674 /*IsInitializer=*/true);
4675 LValue UBLVal = CGF.EmitLValueForField(
4676 Result.TDBase,
4677 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4678 const auto *UBVar =
4679 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4680 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4681 /*IsInitializer=*/true);
4682 LValue StLVal = CGF.EmitLValueForField(
4683 Result.TDBase,
4684 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4685 const auto *StVar =
4686 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4687 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4688 /*IsInitializer=*/true);
4689 // Store reductions address.
4690 LValue RedLVal = CGF.EmitLValueForField(
4691 Result.TDBase,
4692 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4693 if (Data.Reductions) {
4694 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4695 } else {
4696 CGF.EmitNullInitialization(RedLVal.getAddress(),
4697 CGF.getContext().VoidPtrTy);
4698 }
4699 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4701 UpLoc,
4702 ThreadID,
4703 Result.NewTask,
4704 IfVal,
4705 LBLVal.getPointer(CGF),
4706 UBLVal.getPointer(CGF),
4707 CGF.EmitLoadOfScalar(StLVal, Loc),
4708 llvm::ConstantInt::getSigned(
4709 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4710 llvm::ConstantInt::getSigned(
4711 CGF.IntTy, Data.Schedule.getPointer()
4712 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4713 : NoSchedule),
4714 Data.Schedule.getPointer()
4715 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4716 /*isSigned=*/false)
4717 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4718 if (Data.HasModifier)
4719 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4720
4721 TaskArgs.push_back(Result.TaskDupFn
4723 Result.TaskDupFn, CGF.VoidPtrTy)
4724 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4725 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4726 CGM.getModule(), Data.HasModifier
4727 ? OMPRTL___kmpc_taskloop_5
4728 : OMPRTL___kmpc_taskloop),
4729 TaskArgs);
4730}
4731
4732/// Emit reduction operation for each element of array (required for
4733/// array sections) LHS op = RHS.
4734/// \param Type Type of array.
4735/// \param LHSVar Variable on the left side of the reduction operation
4736/// (references element of array in original variable).
4737/// \param RHSVar Variable on the right side of the reduction operation
4738/// (references element of array in original variable).
4739/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4740/// RHSVar.
4742 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4743 const VarDecl *RHSVar,
4744 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4745 const Expr *, const Expr *)> &RedOpGen,
4746 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4747 const Expr *UpExpr = nullptr) {
4748 // Perform element-by-element initialization.
4749 QualType ElementTy;
4750 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4751 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4752
4753 // Drill down to the base element type on both arrays.
4754 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4755 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4756
4757 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4758 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4759 // Cast from pointer to array type to pointer to single element.
4760 llvm::Value *LHSEnd =
4761 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4762 // The basic structure here is a while-do loop.
4763 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4764 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4765 llvm::Value *IsEmpty =
4766 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4767 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4768
4769 // Enter the loop body, making that address the current address.
4770 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4771 CGF.EmitBlock(BodyBB);
4772
4773 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4774
4775 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4776 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4777 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4778 Address RHSElementCurrent(
4779 RHSElementPHI, RHSAddr.getElementType(),
4780 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4781
4782 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4783 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4784 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4785 Address LHSElementCurrent(
4786 LHSElementPHI, LHSAddr.getElementType(),
4787 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4788
4789 // Emit copy.
4791 Scope.addPrivate(LHSVar, LHSElementCurrent);
4792 Scope.addPrivate(RHSVar, RHSElementCurrent);
4793 Scope.Privatize();
4794 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4795 Scope.ForceCleanup();
4796
4797 // Shift the address forward by one element.
4798 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4799 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4800 "omp.arraycpy.dest.element");
4801 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4802 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4803 "omp.arraycpy.src.element");
4804 // Check whether we've reached the end.
4805 llvm::Value *Done =
4806 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4807 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4808 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4809 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4810
4811 // Done.
4812 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4813}
4814
4815/// Emit reduction combiner. If the combiner is a simple expression emit it as
4816/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4817/// UDR combiner function.
4819 const Expr *ReductionOp) {
4820 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4821 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4822 if (const auto *DRE =
4823 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4824 if (const auto *DRD =
4825 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4826 std::pair<llvm::Function *, llvm::Function *> Reduction =
4830 CGF.EmitIgnoredExpr(ReductionOp);
4831 return;
4832 }
4833 CGF.EmitIgnoredExpr(ReductionOp);
4834}
4835
4837 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4839 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4840 ASTContext &C = CGM.getContext();
4841
4842 // void reduction_func(void *LHSArg, void *RHSArg);
4843 FunctionArgList Args;
4844 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4846 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4848 Args.push_back(&LHSArg);
4849 Args.push_back(&RHSArg);
4850 const auto &CGFI =
4851 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4852 std::string Name = getReductionFuncName(ReducerName);
4853 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4854 llvm::GlobalValue::InternalLinkage, Name,
4855 &CGM.getModule());
4856 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4857 Fn->setDoesNotRecurse();
4858 CodeGenFunction CGF(CGM);
4859 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4860
4861 // Dst = (void*[n])(LHSArg);
4862 // Src = (void*[n])(RHSArg);
4864 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4865 CGF.Builder.getPtrTy(0)),
4866 ArgsElemType, CGF.getPointerAlign());
4868 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4869 CGF.Builder.getPtrTy(0)),
4870 ArgsElemType, CGF.getPointerAlign());
4871
4872 // ...
4873 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4874 // ...
4876 const auto *IPriv = Privates.begin();
4877 unsigned Idx = 0;
4878 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4879 const auto *RHSVar =
4880 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4881 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4882 const auto *LHSVar =
4883 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4884 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4885 QualType PrivTy = (*IPriv)->getType();
4886 if (PrivTy->isVariablyModifiedType()) {
4887 // Get array size and emit VLA type.
4888 ++Idx;
4889 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4890 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4891 const VariableArrayType *VLA =
4892 CGF.getContext().getAsVariableArrayType(PrivTy);
4893 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4895 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4896 CGF.EmitVariablyModifiedType(PrivTy);
4897 }
4898 }
4899 Scope.Privatize();
4900 IPriv = Privates.begin();
4901 const auto *ILHS = LHSExprs.begin();
4902 const auto *IRHS = RHSExprs.begin();
4903 for (const Expr *E : ReductionOps) {
4904 if ((*IPriv)->getType()->isArrayType()) {
4905 // Emit reduction for array section.
4906 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4907 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4909 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4910 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4911 emitReductionCombiner(CGF, E);
4912 });
4913 } else {
4914 // Emit reduction for array subscript or single variable.
4915 emitReductionCombiner(CGF, E);
4916 }
4917 ++IPriv;
4918 ++ILHS;
4919 ++IRHS;
4920 }
4921 Scope.ForceCleanup();
4922 CGF.FinishFunction();
4923 return Fn;
4924}
4925
4927 const Expr *ReductionOp,
4928 const Expr *PrivateRef,
4929 const DeclRefExpr *LHS,
4930 const DeclRefExpr *RHS) {
4931 if (PrivateRef->getType()->isArrayType()) {
4932 // Emit reduction for array section.
4933 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4934 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4936 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4937 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4938 emitReductionCombiner(CGF, ReductionOp);
4939 });
4940 } else {
4941 // Emit reduction for array subscript or single variable.
4942 emitReductionCombiner(CGF, ReductionOp);
4943 }
4944}
4945
4946static std::string generateUniqueName(CodeGenModule &CGM,
4947 llvm::StringRef Prefix, const Expr *Ref);
4948
4950 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4951 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4952
4953 // Create a shared global variable (__shared_reduction_var) to accumulate the
4954 // final result.
4955 //
4956 // Call __kmpc_barrier to synchronize threads before initialization.
4957 //
4958 // The master thread (thread_id == 0) initializes __shared_reduction_var
4959 // with the identity value or initializer.
4960 //
4961 // Call __kmpc_barrier to synchronize before combining.
4962 // For each i:
4963 // - Thread enters critical section.
4964 // - Reads its private value from LHSExprs[i].
4965 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4966 // Privates[i]).
4967 // - Exits critical section.
4968 //
4969 // Call __kmpc_barrier after combining.
4970 //
4971 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4972 //
4973 // Final __kmpc_barrier to synchronize after broadcasting
4974 QualType PrivateType = Privates->getType();
4975 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4976
4977 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4978 std::string ReductionVarNameStr;
4979 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4980 ReductionVarNameStr =
4981 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4982 else
4983 ReductionVarNameStr = "unnamed_priv_var";
4984
4985 // Create an internal shared variable
4986 std::string SharedName =
4987 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4988 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4989 LLVMType, ".omp.reduction." + SharedName);
4990
4991 SharedVar->setAlignment(
4992 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4993
4994 Address SharedResult =
4995 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4996
4997 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4998 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4999 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
5000
5001 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
5002 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5003
5004 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5005 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5006 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5007
5008 CGF.EmitBlock(InitBB);
5009
5010 auto EmitSharedInit = [&]() {
5011 if (UDR) { // Check if it's a User-Defined Reduction
5012 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5013 std::pair<llvm::Function *, llvm::Function *> FnPair =
5015 llvm::Function *InitializerFn = FnPair.second;
5016 if (InitializerFn) {
5017 if (const auto *CE =
5018 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5019 const auto *OutDRE = cast<DeclRefExpr>(
5020 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5021 ->getSubExpr());
5022 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5023
5024 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5025 LocalScope.addPrivate(OutVD, SharedResult);
5026
5027 (void)LocalScope.Privatize();
5028 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5029 CE->getCallee()->IgnoreParenImpCasts())) {
5031 CGF, OVE, RValue::get(InitializerFn));
5032 CGF.EmitIgnoredExpr(CE);
5033 } else {
5034 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5035 PrivateType.getQualifiers(),
5036 /*IsInitializer=*/true);
5037 }
5038 } else {
5039 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5040 PrivateType.getQualifiers(),
5041 /*IsInitializer=*/true);
5042 }
5043 } else {
5044 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5045 PrivateType.getQualifiers(),
5046 /*IsInitializer=*/true);
5047 }
5048 } else {
5049 // EmitNullInitialization handles default construction for C++ classes
5050 // and zeroing for scalars, which is a reasonable default.
5051 CGF.EmitNullInitialization(SharedResult, PrivateType);
5052 }
5053 return; // UDR initialization handled
5054 }
5055 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5056 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5057 if (const Expr *InitExpr = VD->getInit()) {
5058 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5059 PrivateType.getQualifiers(), true);
5060 return;
5061 }
5062 }
5063 }
5064 CGF.EmitNullInitialization(SharedResult, PrivateType);
5065 };
5066 EmitSharedInit();
5067 CGF.Builder.CreateBr(InitEndBB);
5068 CGF.EmitBlock(InitEndBB);
5069
5070 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5071 CGM.getModule(), OMPRTL___kmpc_barrier),
5072 BarrierArgs);
5073
5074 const Expr *ReductionOp = ReductionOps;
5075 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5076 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5077 LValue LHSLV = CGF.EmitLValue(Privates);
5078
5079 auto EmitCriticalReduction = [&](auto ReductionGen) {
5080 std::string CriticalName = getName({"reduction_critical"});
5081 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5082 };
5083
5084 if (CurrentUDR) {
5085 // Handle user-defined reduction.
5086 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5087 Action.Enter(CGF);
5088 std::pair<llvm::Function *, llvm::Function *> FnPair =
5089 getUserDefinedReduction(CurrentUDR);
5090 if (FnPair.first) {
5091 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5092 const auto *OutDRE = cast<DeclRefExpr>(
5093 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5094 ->getSubExpr());
5095 const auto *InDRE = cast<DeclRefExpr>(
5096 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5097 ->getSubExpr());
5098 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5099 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5100 SharedLV.getAddress());
5101 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5102 LHSLV.getAddress());
5103 (void)LocalScope.Privatize();
5104 emitReductionCombiner(CGF, ReductionOp);
5105 }
5106 }
5107 };
5108 EmitCriticalReduction(ReductionGen);
5109 } else {
5110 // Handle built-in reduction operations.
5111#ifndef NDEBUG
5112 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5113 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5114 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5115
5116 const Expr *AssignRHS = nullptr;
5117 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5118 if (BinOp->getOpcode() == BO_Assign)
5119 AssignRHS = BinOp->getRHS();
5120 } else if (const auto *OpCall =
5121 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5122 if (OpCall->getOperator() == OO_Equal)
5123 AssignRHS = OpCall->getArg(1);
5124 }
5125
5126 assert(AssignRHS &&
5127 "Private Variable Reduction : Invalid ReductionOp expression");
5128#endif
5129
5130 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5131 Action.Enter(CGF);
5132 const auto *OmpOutDRE =
5133 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5134 const auto *OmpInDRE =
5135 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5136 assert(
5137 OmpOutDRE && OmpInDRE &&
5138 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5139 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5140 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5141 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5142 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5143 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5144 (void)LocalScope.Privatize();
5145 // Emit the actual reduction operation
5146 CGF.EmitIgnoredExpr(ReductionOp);
5147 };
5148 EmitCriticalReduction(ReductionGen);
5149 }
5150
5151 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5152 CGM.getModule(), OMPRTL___kmpc_barrier),
5153 BarrierArgs);
5154
5155 // Broadcast final result
5156 bool IsAggregate = PrivateType->isAggregateType();
5157 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5158 llvm::Value *FinalResultVal = nullptr;
5159 Address FinalResultAddr = Address::invalid();
5160
5161 if (IsAggregate)
5162 FinalResultAddr = SharedResult;
5163 else
5164 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5165
5166 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5167 if (IsAggregate) {
5168 CGF.EmitAggregateCopy(TargetLHSLV,
5169 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5170 PrivateType, AggValueSlot::DoesNotOverlap, false);
5171 } else {
5172 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5173 }
5174 // Final synchronization barrier
5175 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5176 CGM.getModule(), OMPRTL___kmpc_barrier),
5177 BarrierArgs);
5178
5179 // Combiner with original list item
5180 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5181 PrePostActionTy &Action) {
5182 Action.Enter(CGF);
5183 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5184 cast<DeclRefExpr>(LHSExprs),
5185 cast<DeclRefExpr>(RHSExprs));
5186 };
5187 EmitCriticalReduction(OriginalListCombiner);
5188}
5189
5191 ArrayRef<const Expr *> OrgPrivates,
5192 ArrayRef<const Expr *> OrgLHSExprs,
5193 ArrayRef<const Expr *> OrgRHSExprs,
5194 ArrayRef<const Expr *> OrgReductionOps,
5195 ReductionOptionsTy Options) {
5196 if (!CGF.HaveInsertPoint())
5197 return;
5198
5199 bool WithNowait = Options.WithNowait;
5200 bool SimpleReduction = Options.SimpleReduction;
5201
5202 // Next code should be emitted for reduction:
5203 //
5204 // static kmp_critical_name lock = { 0 };
5205 //
5206 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5207 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5208 // ...
5209 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5210 // *(Type<n>-1*)rhs[<n>-1]);
5211 // }
5212 //
5213 // ...
5214 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5215 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5216 // RedList, reduce_func, &<lock>)) {
5217 // case 1:
5218 // ...
5219 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5220 // ...
5221 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5222 // break;
5223 // case 2:
5224 // ...
5225 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5226 // ...
5227 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5228 // break;
5229 // default:;
5230 // }
5231 //
5232 // if SimpleReduction is true, only the next code is generated:
5233 // ...
5234 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5235 // ...
5236
5237 ASTContext &C = CGM.getContext();
5238
5239 if (SimpleReduction) {
5241 const auto *IPriv = OrgPrivates.begin();
5242 const auto *ILHS = OrgLHSExprs.begin();
5243 const auto *IRHS = OrgRHSExprs.begin();
5244 for (const Expr *E : OrgReductionOps) {
5245 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5246 cast<DeclRefExpr>(*IRHS));
5247 ++IPriv;
5248 ++ILHS;
5249 ++IRHS;
5250 }
5251 return;
5252 }
5253
5254 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5255 // Only keep entries where the corresponding variable is not private.
5256 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5257 FilteredRHSExprs, FilteredReductionOps;
5258 for (unsigned I : llvm::seq<unsigned>(
5259 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5260 if (!Options.IsPrivateVarReduction[I]) {
5261 FilteredPrivates.emplace_back(OrgPrivates[I]);
5262 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5263 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5264 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5265 }
5266 }
5267 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5268 // processing.
5269 ArrayRef<const Expr *> Privates = FilteredPrivates;
5270 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5271 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5272 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5273
5274 // 1. Build a list of reduction variables.
5275 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5276 auto Size = RHSExprs.size();
5277 for (const Expr *E : Privates) {
5278 if (E->getType()->isVariablyModifiedType())
5279 // Reserve place for array size.
5280 ++Size;
5281 }
5282 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5283 QualType ReductionArrayTy = C.getConstantArrayType(
5284 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5285 /*IndexTypeQuals=*/0);
5286 RawAddress ReductionList =
5287 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5288 const auto *IPriv = Privates.begin();
5289 unsigned Idx = 0;
5290 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5291 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5292 CGF.Builder.CreateStore(
5294 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5295 Elem);
5296 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5297 // Store array size.
5298 ++Idx;
5299 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5300 llvm::Value *Size = CGF.Builder.CreateIntCast(
5301 CGF.getVLASize(
5302 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5303 .NumElts,
5304 CGF.SizeTy, /*isSigned=*/false);
5305 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5306 Elem);
5307 }
5308 }
5309
5310 // 2. Emit reduce_func().
5311 llvm::Function *ReductionFn = emitReductionFunction(
5312 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5313 Privates, LHSExprs, RHSExprs, ReductionOps);
5314
5315 // 3. Create static kmp_critical_name lock = { 0 };
5316 std::string Name = getName({"reduction"});
5317 llvm::Value *Lock = getCriticalRegionLock(Name);
5318
5319 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5320 // RedList, reduce_func, &<lock>);
5321 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5322 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5323 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5324 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5325 ReductionList.getPointer(), CGF.VoidPtrTy);
5326 llvm::Value *Args[] = {
5327 IdentTLoc, // ident_t *<loc>
5328 ThreadId, // i32 <gtid>
5329 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5330 ReductionArrayTySize, // size_type sizeof(RedList)
5331 RL, // void *RedList
5332 ReductionFn, // void (*) (void *, void *) <reduce_func>
5333 Lock // kmp_critical_name *&<lock>
5334 };
5335 llvm::Value *Res = CGF.EmitRuntimeCall(
5336 OMPBuilder.getOrCreateRuntimeFunction(
5337 CGM.getModule(),
5338 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5339 Args);
5340
5341 // 5. Build switch(res)
5342 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5343 llvm::SwitchInst *SwInst =
5344 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5345
5346 // 6. Build case 1:
5347 // ...
5348 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5349 // ...
5350 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5351 // break;
5352 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5353 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5354 CGF.EmitBlock(Case1BB);
5355
5356 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5357 llvm::Value *EndArgs[] = {
5358 IdentTLoc, // ident_t *<loc>
5359 ThreadId, // i32 <gtid>
5360 Lock // kmp_critical_name *&<lock>
5361 };
5362 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5363 CodeGenFunction &CGF, PrePostActionTy &Action) {
5365 const auto *IPriv = Privates.begin();
5366 const auto *ILHS = LHSExprs.begin();
5367 const auto *IRHS = RHSExprs.begin();
5368 for (const Expr *E : ReductionOps) {
5369 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5370 cast<DeclRefExpr>(*IRHS));
5371 ++IPriv;
5372 ++ILHS;
5373 ++IRHS;
5374 }
5375 };
5377 CommonActionTy Action(
5378 nullptr, {},
5379 OMPBuilder.getOrCreateRuntimeFunction(
5380 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5381 : OMPRTL___kmpc_end_reduce),
5382 EndArgs);
5383 RCG.setAction(Action);
5384 RCG(CGF);
5385
5386 CGF.EmitBranch(DefaultBB);
5387
5388 // 7. Build case 2:
5389 // ...
5390 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5391 // ...
5392 // break;
5393 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5394 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5395 CGF.EmitBlock(Case2BB);
5396
5397 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5398 CodeGenFunction &CGF, PrePostActionTy &Action) {
5399 const auto *ILHS = LHSExprs.begin();
5400 const auto *IRHS = RHSExprs.begin();
5401 const auto *IPriv = Privates.begin();
5402 for (const Expr *E : ReductionOps) {
5403 const Expr *XExpr = nullptr;
5404 const Expr *EExpr = nullptr;
5405 const Expr *UpExpr = nullptr;
5406 BinaryOperatorKind BO = BO_Comma;
5407 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5408 if (BO->getOpcode() == BO_Assign) {
5409 XExpr = BO->getLHS();
5410 UpExpr = BO->getRHS();
5411 }
5412 }
5413 // Try to emit update expression as a simple atomic.
5414 const Expr *RHSExpr = UpExpr;
5415 if (RHSExpr) {
5416 // Analyze RHS part of the whole expression.
5417 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5418 RHSExpr->IgnoreParenImpCasts())) {
5419 // If this is a conditional operator, analyze its condition for
5420 // min/max reduction operator.
5421 RHSExpr = ACO->getCond();
5422 }
5423 if (const auto *BORHS =
5424 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5425 EExpr = BORHS->getRHS();
5426 BO = BORHS->getOpcode();
5427 }
5428 }
5429 if (XExpr) {
5430 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5431 auto &&AtomicRedGen = [BO, VD,
5432 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5433 const Expr *EExpr, const Expr *UpExpr) {
5434 LValue X = CGF.EmitLValue(XExpr);
5435 RValue E;
5436 if (EExpr)
5437 E = CGF.EmitAnyExpr(EExpr);
5438 CGF.EmitOMPAtomicSimpleUpdateExpr(
5439 X, E, BO, /*IsXLHSInRHSPart=*/true,
5440 llvm::AtomicOrdering::Monotonic, Loc,
5441 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5442 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5443 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5444 CGF.emitOMPSimpleStore(
5445 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5446 VD->getType().getNonReferenceType(), Loc);
5447 PrivateScope.addPrivate(VD, LHSTemp);
5448 (void)PrivateScope.Privatize();
5449 return CGF.EmitAnyExpr(UpExpr);
5450 });
5451 };
5452 if ((*IPriv)->getType()->isArrayType()) {
5453 // Emit atomic reduction for array section.
5454 const auto *RHSVar =
5455 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5456 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5457 AtomicRedGen, XExpr, EExpr, UpExpr);
5458 } else {
5459 // Emit atomic reduction for array subscript or single variable.
5460 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5461 }
5462 } else {
5463 // Emit as a critical region.
5464 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5465 const Expr *, const Expr *) {
5466 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5467 std::string Name = RT.getName({"atomic_reduction"});
5469 CGF, Name,
5470 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5471 Action.Enter(CGF);
5472 emitReductionCombiner(CGF, E);
5473 },
5474 Loc);
5475 };
5476 if ((*IPriv)->getType()->isArrayType()) {
5477 const auto *LHSVar =
5478 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5479 const auto *RHSVar =
5480 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5481 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5482 CritRedGen);
5483 } else {
5484 CritRedGen(CGF, nullptr, nullptr, nullptr);
5485 }
5486 }
5487 ++ILHS;
5488 ++IRHS;
5489 ++IPriv;
5490 }
5491 };
5492 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5493 if (!WithNowait) {
5494 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5495 llvm::Value *EndArgs[] = {
5496 IdentTLoc, // ident_t *<loc>
5497 ThreadId, // i32 <gtid>
5498 Lock // kmp_critical_name *&<lock>
5499 };
5500 CommonActionTy Action(nullptr, {},
5501 OMPBuilder.getOrCreateRuntimeFunction(
5502 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5503 EndArgs);
5504 AtomicRCG.setAction(Action);
5505 AtomicRCG(CGF);
5506 } else {
5507 AtomicRCG(CGF);
5508 }
5509
5510 CGF.EmitBranch(DefaultBB);
5511 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5512 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5513 "PrivateVarReduction: Privates size mismatch");
5514 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5515 "PrivateVarReduction: ReductionOps size mismatch");
5516 for (unsigned I : llvm::seq<unsigned>(
5517 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5518 if (Options.IsPrivateVarReduction[I])
5519 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5520 OrgRHSExprs[I], OrgReductionOps[I]);
5521 }
5522}
5523
5524/// Generates unique name for artificial threadprivate variables.
5525/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5526static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5527 const Expr *Ref) {
5528 SmallString<256> Buffer;
5529 llvm::raw_svector_ostream Out(Buffer);
5530 const clang::DeclRefExpr *DE;
5531 const VarDecl *D = ::getBaseDecl(Ref, DE);
5532 if (!D)
5533 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5534 D = D->getCanonicalDecl();
5535 std::string Name = CGM.getOpenMPRuntime().getName(
5536 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5537 Out << Prefix << Name << "_"
5539 return std::string(Out.str());
5540}
5541
5542/// Emits reduction initializer function:
5543/// \code
5544/// void @.red_init(void* %arg, void* %orig) {
5545/// %0 = bitcast void* %arg to <type>*
5546/// store <type> <init>, <type>* %0
5547/// ret void
5548/// }
5549/// \endcode
5550static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5551 SourceLocation Loc,
5552 ReductionCodeGen &RCG, unsigned N) {
5553 ASTContext &C = CGM.getContext();
5554 QualType VoidPtrTy = C.VoidPtrTy;
5555 VoidPtrTy.addRestrict();
5556 FunctionArgList Args;
5557 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5559 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5561 Args.emplace_back(&Param);
5562 Args.emplace_back(&ParamOrig);
5563 const auto &FnInfo =
5564 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5565 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5566 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5567 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5568 Name, &CGM.getModule());
5569 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5570 Fn->setDoesNotRecurse();
5571 CodeGenFunction CGF(CGM);
5572 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5573 QualType PrivateType = RCG.getPrivateType(N);
5574 Address PrivateAddr = CGF.EmitLoadOfPointer(
5575 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5576 C.getPointerType(PrivateType)->castAs<PointerType>());
5577 llvm::Value *Size = nullptr;
5578 // If the size of the reduction item is non-constant, load it from global
5579 // threadprivate variable.
5580 if (RCG.getSizes(N).second) {
5582 CGF, CGM.getContext().getSizeType(),
5583 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5584 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5585 CGM.getContext().getSizeType(), Loc);
5586 }
5587 RCG.emitAggregateType(CGF, N, Size);
5588 Address OrigAddr = Address::invalid();
5589 // If initializer uses initializer from declare reduction construct, emit a
5590 // pointer to the address of the original reduction item (reuired by reduction
5591 // initializer)
5592 if (RCG.usesReductionInitializer(N)) {
5593 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5594 OrigAddr = CGF.EmitLoadOfPointer(
5595 SharedAddr,
5596 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5597 }
5598 // Emit the initializer:
5599 // %0 = bitcast void* %arg to <type>*
5600 // store <type> <init>, <type>* %0
5601 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5602 [](CodeGenFunction &) { return false; });
5603 CGF.FinishFunction();
5604 return Fn;
5605}
5606
5607/// Emits reduction combiner function:
5608/// \code
5609/// void @.red_comb(void* %arg0, void* %arg1) {
5610/// %lhs = bitcast void* %arg0 to <type>*
5611/// %rhs = bitcast void* %arg1 to <type>*
5612/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5613/// store <type> %2, <type>* %lhs
5614/// ret void
5615/// }
5616/// \endcode
5617static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5618 SourceLocation Loc,
5619 ReductionCodeGen &RCG, unsigned N,
5620 const Expr *ReductionOp,
5621 const Expr *LHS, const Expr *RHS,
5622 const Expr *PrivateRef) {
5623 ASTContext &C = CGM.getContext();
5624 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5625 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5626 FunctionArgList Args;
5627 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5628 C.VoidPtrTy, ImplicitParamKind::Other);
5629 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5631 Args.emplace_back(&ParamInOut);
5632 Args.emplace_back(&ParamIn);
5633 const auto &FnInfo =
5634 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5635 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5636 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5637 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5638 Name, &CGM.getModule());
5639 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5640 Fn->setDoesNotRecurse();
5641 CodeGenFunction CGF(CGM);
5642 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5643 llvm::Value *Size = nullptr;
5644 // If the size of the reduction item is non-constant, load it from global
5645 // threadprivate variable.
5646 if (RCG.getSizes(N).second) {
5648 CGF, CGM.getContext().getSizeType(),
5649 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5650 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5651 CGM.getContext().getSizeType(), Loc);
5652 }
5653 RCG.emitAggregateType(CGF, N, Size);
5654 // Remap lhs and rhs variables to the addresses of the function arguments.
5655 // %lhs = bitcast void* %arg0 to <type>*
5656 // %rhs = bitcast void* %arg1 to <type>*
5657 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5658 PrivateScope.addPrivate(
5659 LHSVD,
5660 // Pull out the pointer to the variable.
5662 CGF.GetAddrOfLocalVar(&ParamInOut)
5663 .withElementType(CGF.Builder.getPtrTy(0)),
5664 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5665 PrivateScope.addPrivate(
5666 RHSVD,
5667 // Pull out the pointer to the variable.
5669 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5670 CGF.Builder.getPtrTy(0)),
5671 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5672 PrivateScope.Privatize();
5673 // Emit the combiner body:
5674 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5675 // store <type> %2, <type>* %lhs
5677 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5678 cast<DeclRefExpr>(RHS));
5679 CGF.FinishFunction();
5680 return Fn;
5681}
5682
5683/// Emits reduction finalizer function:
5684/// \code
5685/// void @.red_fini(void* %arg) {
5686/// %0 = bitcast void* %arg to <type>*
5687/// <destroy>(<type>* %0)
5688/// ret void
5689/// }
5690/// \endcode
5691static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5692 SourceLocation Loc,
5693 ReductionCodeGen &RCG, unsigned N) {
5694 if (!RCG.needCleanups(N))
5695 return nullptr;
5696 ASTContext &C = CGM.getContext();
5697 FunctionArgList Args;
5698 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5700 Args.emplace_back(&Param);
5701 const auto &FnInfo =
5702 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5703 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5704 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5705 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5706 Name, &CGM.getModule());
5707 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5708 Fn->setDoesNotRecurse();
5709 CodeGenFunction CGF(CGM);
5710 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5711 Address PrivateAddr = CGF.EmitLoadOfPointer(
5712 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5713 llvm::Value *Size = nullptr;
5714 // If the size of the reduction item is non-constant, load it from global
5715 // threadprivate variable.
5716 if (RCG.getSizes(N).second) {
5718 CGF, CGM.getContext().getSizeType(),
5719 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5720 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5721 CGM.getContext().getSizeType(), Loc);
5722 }
5723 RCG.emitAggregateType(CGF, N, Size);
5724 // Emit the finalizer body:
5725 // <destroy>(<type>* %0)
5726 RCG.emitCleanups(CGF, N, PrivateAddr);
5727 CGF.FinishFunction(Loc);
5728 return Fn;
5729}
5730
5733 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5734 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5735 return nullptr;
5736
5737 // Build typedef struct:
5738 // kmp_taskred_input {
5739 // void *reduce_shar; // shared reduction item
5740 // void *reduce_orig; // original reduction item used for initialization
5741 // size_t reduce_size; // size of data item
5742 // void *reduce_init; // data initialization routine
5743 // void *reduce_fini; // data finalization routine
5744 // void *reduce_comb; // data combiner routine
5745 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5746 // } kmp_taskred_input_t;
5747 ASTContext &C = CGM.getContext();
5748 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5749 RD->startDefinition();
5750 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5751 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5752 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5753 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5754 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5755 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5756 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5757 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5758 RD->completeDefinition();
5759 CanQualType RDType = C.getCanonicalTagType(RD);
5760 unsigned Size = Data.ReductionVars.size();
5761 llvm::APInt ArraySize(/*numBits=*/64, Size);
5762 QualType ArrayRDType =
5763 C.getConstantArrayType(RDType, ArraySize, nullptr,
5764 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5765 // kmp_task_red_input_t .rd_input.[Size];
5766 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5767 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5768 Data.ReductionCopies, Data.ReductionOps);
5769 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5770 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5771 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5772 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5773 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5774 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5775 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5776 ".rd_input.gep.");
5777 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5778 // ElemLVal.reduce_shar = &Shareds[Cnt];
5779 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5780 RCG.emitSharedOrigLValue(CGF, Cnt);
5781 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5782 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5783 // ElemLVal.reduce_orig = &Origs[Cnt];
5784 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5785 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5786 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5787 RCG.emitAggregateType(CGF, Cnt);
5788 llvm::Value *SizeValInChars;
5789 llvm::Value *SizeVal;
5790 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5791 // We use delayed creation/initialization for VLAs and array sections. It is
5792 // required because runtime does not provide the way to pass the sizes of
5793 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5794 // threadprivate global variables are used to store these values and use
5795 // them in the functions.
5796 bool DelayedCreation = !!SizeVal;
5797 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5798 /*isSigned=*/false);
5799 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5800 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5801 // ElemLVal.reduce_init = init;
5802 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5803 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5804 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5805 // ElemLVal.reduce_fini = fini;
5806 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5807 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5808 llvm::Value *FiniAddr =
5809 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5810 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5811 // ElemLVal.reduce_comb = comb;
5812 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5813 llvm::Value *CombAddr = emitReduceCombFunction(
5814 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5815 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5816 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5817 // ElemLVal.flags = 0;
5818 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5819 if (DelayedCreation) {
5821 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5822 FlagsLVal);
5823 } else
5824 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5825 }
5826 if (Data.IsReductionWithTaskMod) {
5827 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5828 // is_ws, int num, void *data);
5829 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5830 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5831 CGM.IntTy, /*isSigned=*/true);
5832 llvm::Value *Args[] = {
5833 IdentTLoc, GTid,
5834 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5835 /*isSigned=*/true),
5836 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5838 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5839 return CGF.EmitRuntimeCall(
5840 OMPBuilder.getOrCreateRuntimeFunction(
5841 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5842 Args);
5843 }
5844 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5845 llvm::Value *Args[] = {
5846 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5847 /*isSigned=*/true),
5848 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5850 CGM.VoidPtrTy)};
5851 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5852 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5853 Args);
5854}
5855
5857 SourceLocation Loc,
5858 bool IsWorksharingReduction) {
5859 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5860 // is_ws, int num, void *data);
5861 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5862 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5863 CGM.IntTy, /*isSigned=*/true);
5864 llvm::Value *Args[] = {IdentTLoc, GTid,
5865 llvm::ConstantInt::get(CGM.IntTy,
5866 IsWorksharingReduction ? 1 : 0,
5867 /*isSigned=*/true)};
5868 (void)CGF.EmitRuntimeCall(
5869 OMPBuilder.getOrCreateRuntimeFunction(
5870 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5871 Args);
5872}
5873
5875 SourceLocation Loc,
5876 ReductionCodeGen &RCG,
5877 unsigned N) {
5878 auto Sizes = RCG.getSizes(N);
5879 // Emit threadprivate global variable if the type is non-constant
5880 // (Sizes.second = nullptr).
5881 if (Sizes.second) {
5882 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5883 /*isSigned=*/false);
5885 CGF, CGM.getContext().getSizeType(),
5886 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5887 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5888 }
5889}
5890
5892 SourceLocation Loc,
5893 llvm::Value *ReductionsPtr,
5894 LValue SharedLVal) {
5895 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5896 // *d);
5897 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5898 CGM.IntTy,
5899 /*isSigned=*/true),
5900 ReductionsPtr,
5902 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5903 return Address(
5904 CGF.EmitRuntimeCall(
5905 OMPBuilder.getOrCreateRuntimeFunction(
5906 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5907 Args),
5908 CGF.Int8Ty, SharedLVal.getAlignment());
5909}
5910
5912 const OMPTaskDataTy &Data) {
5913 if (!CGF.HaveInsertPoint())
5914 return;
5915
5916 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5917 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5918 OMPBuilder.createTaskwait(CGF.Builder);
5919 } else {
5920 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5921 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5922 auto &M = CGM.getModule();
5923 Address DependenciesArray = Address::invalid();
5924 llvm::Value *NumOfElements;
5925 std::tie(NumOfElements, DependenciesArray) =
5926 emitDependClause(CGF, Data.Dependences, Loc);
5927 if (!Data.Dependences.empty()) {
5928 llvm::Value *DepWaitTaskArgs[7];
5929 DepWaitTaskArgs[0] = UpLoc;
5930 DepWaitTaskArgs[1] = ThreadID;
5931 DepWaitTaskArgs[2] = NumOfElements;
5932 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5933 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5934 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5935 DepWaitTaskArgs[6] =
5936 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5937
5938 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5939
5940 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5941 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5942 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5943 // kmp_int32 has_no_wait); if dependence info is specified.
5944 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5945 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5946 DepWaitTaskArgs);
5947
5948 } else {
5949
5950 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5951 // global_tid);
5952 llvm::Value *Args[] = {UpLoc, ThreadID};
5953 // Ignore return result until untied tasks are supported.
5954 CGF.EmitRuntimeCall(
5955 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5956 Args);
5957 }
5958 }
5959
5960 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5961 Region->emitUntiedSwitch(CGF);
5962}
5963
5965 OpenMPDirectiveKind InnerKind,
5966 const RegionCodeGenTy &CodeGen,
5967 bool HasCancel) {
5968 if (!CGF.HaveInsertPoint())
5969 return;
5970 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5971 InnerKind != OMPD_critical &&
5972 InnerKind != OMPD_master &&
5973 InnerKind != OMPD_masked);
5974 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5975}
5976
5977namespace {
5978enum RTCancelKind {
5979 CancelNoreq = 0,
5980 CancelParallel = 1,
5981 CancelLoop = 2,
5982 CancelSections = 3,
5983 CancelTaskgroup = 4
5984};
5985} // anonymous namespace
5986
5987static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5988 RTCancelKind CancelKind = CancelNoreq;
5989 if (CancelRegion == OMPD_parallel)
5990 CancelKind = CancelParallel;
5991 else if (CancelRegion == OMPD_for)
5992 CancelKind = CancelLoop;
5993 else if (CancelRegion == OMPD_sections)
5994 CancelKind = CancelSections;
5995 else {
5996 assert(CancelRegion == OMPD_taskgroup);
5997 CancelKind = CancelTaskgroup;
5998 }
5999 return CancelKind;
6000}
6001
6004 OpenMPDirectiveKind CancelRegion) {
6005 if (!CGF.HaveInsertPoint())
6006 return;
6007 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6008 // global_tid, kmp_int32 cncl_kind);
6009 if (auto *OMPRegionInfo =
6010 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6011 // For 'cancellation point taskgroup', the task region info may not have a
6012 // cancel. This may instead happen in another adjacent task.
6013 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6014 llvm::Value *Args[] = {
6015 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6016 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6017 // Ignore return result until untied tasks are supported.
6018 llvm::Value *Result = CGF.EmitRuntimeCall(
6019 OMPBuilder.getOrCreateRuntimeFunction(
6020 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6021 Args);
6022 // if (__kmpc_cancellationpoint()) {
6023 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6024 // exit from construct;
6025 // }
6026 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6027 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6028 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6029 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6030 CGF.EmitBlock(ExitBB);
6031 if (CancelRegion == OMPD_parallel)
6032 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6033 // exit from construct;
6034 CodeGenFunction::JumpDest CancelDest =
6035 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6036 CGF.EmitBranchThroughCleanup(CancelDest);
6037 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6038 }
6039 }
6040}
6041
6043 const Expr *IfCond,
6044 OpenMPDirectiveKind CancelRegion) {
6045 if (!CGF.HaveInsertPoint())
6046 return;
6047 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6048 // kmp_int32 cncl_kind);
6049 auto &M = CGM.getModule();
6050 if (auto *OMPRegionInfo =
6051 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6052 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6053 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6054 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6055 llvm::Value *Args[] = {
6056 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6057 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6058 // Ignore return result until untied tasks are supported.
6059 llvm::Value *Result = CGF.EmitRuntimeCall(
6060 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6061 // if (__kmpc_cancel()) {
6062 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6063 // exit from construct;
6064 // }
6065 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6066 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6067 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6068 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6069 CGF.EmitBlock(ExitBB);
6070 if (CancelRegion == OMPD_parallel)
6071 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6072 // exit from construct;
6073 CodeGenFunction::JumpDest CancelDest =
6074 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6075 CGF.EmitBranchThroughCleanup(CancelDest);
6076 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6077 };
6078 if (IfCond) {
6079 emitIfClause(CGF, IfCond, ThenGen,
6080 [](CodeGenFunction &, PrePostActionTy &) {});
6081 } else {
6082 RegionCodeGenTy ThenRCG(ThenGen);
6083 ThenRCG(CGF);
6084 }
6085 }
6086}
6087
6088namespace {
6089/// Cleanup action for uses_allocators support.
6090class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6092
6093public:
6094 OMPUsesAllocatorsActionTy(
6095 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6096 : Allocators(Allocators) {}
6097 void Enter(CodeGenFunction &CGF) override {
6098 if (!CGF.HaveInsertPoint())
6099 return;
6100 for (const auto &AllocatorData : Allocators) {
6102 CGF, AllocatorData.first, AllocatorData.second);
6103 }
6104 }
6105 void Exit(CodeGenFunction &CGF) override {
6106 if (!CGF.HaveInsertPoint())
6107 return;
6108 for (const auto &AllocatorData : Allocators) {
6110 AllocatorData.first);
6111 }
6112 }
6113};
6114} // namespace
6115
6117 const OMPExecutableDirective &D, StringRef ParentName,
6118 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6119 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6120 assert(!ParentName.empty() && "Invalid target entry parent name!");
6123 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6124 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6125 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6126 if (!D.AllocatorTraits)
6127 continue;
6128 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6129 }
6130 }
6131 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6132 CodeGen.setAction(UsesAllocatorAction);
6133 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6134 IsOffloadEntry, CodeGen);
6135}
6136
6138 const Expr *Allocator,
6139 const Expr *AllocatorTraits) {
6140 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6141 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6142 // Use default memspace handle.
6143 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6144 llvm::Value *NumTraits = llvm::ConstantInt::get(
6146 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6147 ->getSize()
6148 .getLimitedValue());
6149 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6151 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6152 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6153 AllocatorTraitsLVal.getBaseInfo(),
6154 AllocatorTraitsLVal.getTBAAInfo());
6155 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6156
6157 llvm::Value *AllocatorVal =
6158 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6159 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6160 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6161 // Store to allocator.
6163 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6164 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6165 AllocatorVal =
6166 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6167 Allocator->getType(), Allocator->getExprLoc());
6168 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6169}
6170
6172 const Expr *Allocator) {
6173 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6174 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6175 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6176 llvm::Value *AllocatorVal =
6177 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6178 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6179 CGF.getContext().VoidPtrTy,
6180 Allocator->getExprLoc());
6181 (void)CGF.EmitRuntimeCall(
6182 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6183 OMPRTL___kmpc_destroy_allocator),
6184 {ThreadId, AllocatorVal});
6185}
6186
6189 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6190 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6191 "invalid default attrs structure");
6192 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6193 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6194
6195 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6196 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6197 /*UpperBoundOnly=*/true);
6198
6199 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6200 for (auto *A : C->getAttrs()) {
6201 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6202 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6203 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6204 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6205 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6206 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6207 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6208 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6209 &AttrMaxThreadsVal);
6210 else
6211 continue;
6212
6213 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6214 if (AttrMaxThreadsVal > 0)
6215 MaxThreadsVal = MaxThreadsVal > 0
6216 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6217 : AttrMaxThreadsVal;
6218 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6219 if (AttrMaxBlocksVal > 0)
6220 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6221 : AttrMaxBlocksVal;
6222 }
6223 }
6224}
6225
6227 const OMPExecutableDirective &D, StringRef ParentName,
6228 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6229 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6230
6231 llvm::TargetRegionEntryInfo EntryInfo =
6232 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6233
6234 CodeGenFunction CGF(CGM, true);
6235 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6236 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6237 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6238
6239 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6241 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6242 };
6243
6244 cantFail(OMPBuilder.emitTargetRegionFunction(
6245 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6246 OutlinedFnID));
6247
6248 if (!OutlinedFn)
6249 return;
6250
6251 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6252
6253 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6254 for (auto *A : C->getAttrs()) {
6255 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6256 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6257 }
6258 }
6259}
6260
6261/// Checks if the expression is constant or does not have non-trivial function
6262/// calls.
6263static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6264 // We can skip constant expressions.
6265 // We can skip expressions with trivial calls or simple expressions.
6267 !E->hasNonTrivialCall(Ctx)) &&
6268 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6269}
6270
6272 const Stmt *Body) {
6273 const Stmt *Child = Body->IgnoreContainers();
6274 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6275 Child = nullptr;
6276 for (const Stmt *S : C->body()) {
6277 if (const auto *E = dyn_cast<Expr>(S)) {
6278 if (isTrivial(Ctx, E))
6279 continue;
6280 }
6281 // Some of the statements can be ignored.
6284 continue;
6285 // Analyze declarations.
6286 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6287 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6288 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6289 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6290 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6291 isa<UsingDirectiveDecl>(D) ||
6292 isa<OMPDeclareReductionDecl>(D) ||
6293 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6294 return true;
6295 const auto *VD = dyn_cast<VarDecl>(D);
6296 if (!VD)
6297 return false;
6298 return VD->hasGlobalStorage() || !VD->isUsed();
6299 }))
6300 continue;
6301 }
6302 // Found multiple children - cannot get the one child only.
6303 if (Child)
6304 return nullptr;
6305 Child = S;
6306 }
6307 if (Child)
6308 Child = Child->IgnoreContainers();
6309 }
6310 return Child;
6311}
6312
6314 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6315 int32_t &MaxTeamsVal) {
6316
6317 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6318 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6319 "Expected target-based executable directive.");
6320 switch (DirectiveKind) {
6321 case OMPD_target: {
6322 const auto *CS = D.getInnermostCapturedStmt();
6323 const auto *Body =
6324 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6325 const Stmt *ChildStmt =
6327 if (const auto *NestedDir =
6328 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6329 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6330 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6331 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6332 ->getNumTeams()
6333 .front();
6334 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6335 if (auto Constant =
6336 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6337 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6338 return NumTeams;
6339 }
6340 MinTeamsVal = MaxTeamsVal = 0;
6341 return nullptr;
6342 }
6343 MinTeamsVal = MaxTeamsVal = 1;
6344 return nullptr;
6345 }
6346 // A value of -1 is used to check if we need to emit no teams region
6347 MinTeamsVal = MaxTeamsVal = -1;
6348 return nullptr;
6349 }
6350 case OMPD_target_teams_loop:
6351 case OMPD_target_teams:
6352 case OMPD_target_teams_distribute:
6353 case OMPD_target_teams_distribute_simd:
6354 case OMPD_target_teams_distribute_parallel_for:
6355 case OMPD_target_teams_distribute_parallel_for_simd: {
6356 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6357 const Expr *NumTeams =
6358 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6359 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6360 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6361 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6362 return NumTeams;
6363 }
6364 MinTeamsVal = MaxTeamsVal = 0;
6365 return nullptr;
6366 }
6367 case OMPD_target_parallel:
6368 case OMPD_target_parallel_for:
6369 case OMPD_target_parallel_for_simd:
6370 case OMPD_target_parallel_loop:
6371 case OMPD_target_simd:
6372 MinTeamsVal = MaxTeamsVal = 1;
6373 return nullptr;
6374 case OMPD_parallel:
6375 case OMPD_for:
6376 case OMPD_parallel_for:
6377 case OMPD_parallel_loop:
6378 case OMPD_parallel_master:
6379 case OMPD_parallel_sections:
6380 case OMPD_for_simd:
6381 case OMPD_parallel_for_simd:
6382 case OMPD_cancel:
6383 case OMPD_cancellation_point:
6384 case OMPD_ordered:
6385 case OMPD_threadprivate:
6386 case OMPD_allocate:
6387 case OMPD_task:
6388 case OMPD_simd:
6389 case OMPD_tile:
6390 case OMPD_unroll:
6391 case OMPD_sections:
6392 case OMPD_section:
6393 case OMPD_single:
6394 case OMPD_master:
6395 case OMPD_critical:
6396 case OMPD_taskyield:
6397 case OMPD_barrier:
6398 case OMPD_taskwait:
6399 case OMPD_taskgroup:
6400 case OMPD_atomic:
6401 case OMPD_flush:
6402 case OMPD_depobj:
6403 case OMPD_scan:
6404 case OMPD_teams:
6405 case OMPD_target_data:
6406 case OMPD_target_exit_data:
6407 case OMPD_target_enter_data:
6408 case OMPD_distribute:
6409 case OMPD_distribute_simd:
6410 case OMPD_distribute_parallel_for:
6411 case OMPD_distribute_parallel_for_simd:
6412 case OMPD_teams_distribute:
6413 case OMPD_teams_distribute_simd:
6414 case OMPD_teams_distribute_parallel_for:
6415 case OMPD_teams_distribute_parallel_for_simd:
6416 case OMPD_target_update:
6417 case OMPD_declare_simd:
6418 case OMPD_declare_variant:
6419 case OMPD_begin_declare_variant:
6420 case OMPD_end_declare_variant:
6421 case OMPD_declare_target:
6422 case OMPD_end_declare_target:
6423 case OMPD_declare_reduction:
6424 case OMPD_declare_mapper:
6425 case OMPD_taskloop:
6426 case OMPD_taskloop_simd:
6427 case OMPD_master_taskloop:
6428 case OMPD_master_taskloop_simd:
6429 case OMPD_parallel_master_taskloop:
6430 case OMPD_parallel_master_taskloop_simd:
6431 case OMPD_requires:
6432 case OMPD_metadirective:
6433 case OMPD_unknown:
6434 break;
6435 default:
6436 break;
6437 }
6438 llvm_unreachable("Unexpected directive kind.");
6439}
6440
6442 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6443 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6444 "Clauses associated with the teams directive expected to be emitted "
6445 "only for the host!");
6446 CGBuilderTy &Bld = CGF.Builder;
6447 int32_t MinNT = -1, MaxNT = -1;
6448 const Expr *NumTeams =
6449 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6450 if (NumTeams != nullptr) {
6451 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6452
6453 switch (DirectiveKind) {
6454 case OMPD_target: {
6455 const auto *CS = D.getInnermostCapturedStmt();
6456 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6457 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6458 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6459 /*IgnoreResultAssign*/ true);
6460 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6461 /*isSigned=*/true);
6462 }
6463 case OMPD_target_teams:
6464 case OMPD_target_teams_distribute:
6465 case OMPD_target_teams_distribute_simd:
6466 case OMPD_target_teams_distribute_parallel_for:
6467 case OMPD_target_teams_distribute_parallel_for_simd: {
6468 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6469 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6470 /*IgnoreResultAssign*/ true);
6471 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6472 /*isSigned=*/true);
6473 }
6474 default:
6475 break;
6476 }
6477 }
6478
6479 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6480 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6481}
6482
6483/// Check for a num threads constant value (stored in \p DefaultVal), or
6484/// expression (stored in \p E). If the value is conditional (via an if-clause),
6485/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6486/// nullptr, no expression evaluation is perfomed.
6487static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6488 const Expr **E, int32_t &UpperBound,
6489 bool UpperBoundOnly, llvm::Value **CondVal) {
6491 CGF.getContext(), CS->getCapturedStmt());
6492 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6493 if (!Dir)
6494 return;
6495
6496 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6497 // Handle if clause. If if clause present, the number of threads is
6498 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6499 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6500 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6501 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6502 const OMPIfClause *IfClause = nullptr;
6503 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6504 if (C->getNameModifier() == OMPD_unknown ||
6505 C->getNameModifier() == OMPD_parallel) {
6506 IfClause = C;
6507 break;
6508 }
6509 }
6510 if (IfClause) {
6511 const Expr *CondExpr = IfClause->getCondition();
6512 bool Result;
6513 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6514 if (!Result) {
6515 UpperBound = 1;
6516 return;
6517 }
6518 } else {
6520 if (const auto *PreInit =
6521 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6522 for (const auto *I : PreInit->decls()) {
6523 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6524 CGF.EmitVarDecl(cast<VarDecl>(*I));
6525 } else {
6528 CGF.EmitAutoVarCleanups(Emission);
6529 }
6530 }
6531 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6532 }
6533 }
6534 }
6535 }
6536 // Check the value of num_threads clause iff if clause was not specified
6537 // or is not evaluated to false.
6538 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6539 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6540 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6541 const auto *NumThreadsClause =
6542 Dir->getSingleClause<OMPNumThreadsClause>();
6543 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6544 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6545 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6546 UpperBound =
6547 UpperBound
6548 ? Constant->getZExtValue()
6549 : std::min(UpperBound,
6550 static_cast<int32_t>(Constant->getZExtValue()));
6551 // If we haven't found a upper bound, remember we saw a thread limiting
6552 // clause.
6553 if (UpperBound == -1)
6554 UpperBound = 0;
6555 if (!E)
6556 return;
6557 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6558 if (const auto *PreInit =
6559 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6560 for (const auto *I : PreInit->decls()) {
6561 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6562 CGF.EmitVarDecl(cast<VarDecl>(*I));
6563 } else {
6566 CGF.EmitAutoVarCleanups(Emission);
6567 }
6568 }
6569 }
6570 *E = NTExpr;
6571 }
6572 return;
6573 }
6574 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6575 UpperBound = 1;
6576}
6577
6579 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6580 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6581 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6582 "Clauses associated with the teams directive expected to be emitted "
6583 "only for the host!");
6584 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6585 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6586 "Expected target-based executable directive.");
6587
6588 const Expr *NT = nullptr;
6589 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6590
6591 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6592 if (E->isIntegerConstantExpr(CGF.getContext())) {
6593 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6594 UpperBound = UpperBound ? Constant->getZExtValue()
6595 : std::min(UpperBound,
6596 int32_t(Constant->getZExtValue()));
6597 }
6598 // If we haven't found a upper bound, remember we saw a thread limiting
6599 // clause.
6600 if (UpperBound == -1)
6601 UpperBound = 0;
6602 if (EPtr)
6603 *EPtr = E;
6604 };
6605
6606 auto ReturnSequential = [&]() {
6607 UpperBound = 1;
6608 return NT;
6609 };
6610
6611 switch (DirectiveKind) {
6612 case OMPD_target: {
6613 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6614 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6616 CGF.getContext(), CS->getCapturedStmt());
6617 // TODO: The standard is not clear how to resolve two thread limit clauses,
6618 // let's pick the teams one if it's present, otherwise the target one.
6619 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6620 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6621 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6622 ThreadLimitClause = TLC;
6623 if (ThreadLimitExpr) {
6624 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6625 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6627 CGF,
6628 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6629 if (const auto *PreInit =
6630 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6631 for (const auto *I : PreInit->decls()) {
6632 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6633 CGF.EmitVarDecl(cast<VarDecl>(*I));
6634 } else {
6637 CGF.EmitAutoVarCleanups(Emission);
6638 }
6639 }
6640 }
6641 }
6642 }
6643 }
6644 if (ThreadLimitClause)
6645 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6646 ThreadLimitExpr);
6647 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6648 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6649 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6650 CS = Dir->getInnermostCapturedStmt();
6652 CGF.getContext(), CS->getCapturedStmt());
6653 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6654 }
6655 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6656 CS = Dir->getInnermostCapturedStmt();
6657 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6658 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6659 return ReturnSequential();
6660 }
6661 return NT;
6662 }
6663 case OMPD_target_teams: {
6664 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6665 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6666 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6667 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6668 ThreadLimitExpr);
6669 }
6670 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6671 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6673 CGF.getContext(), CS->getCapturedStmt());
6674 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6675 if (Dir->getDirectiveKind() == OMPD_distribute) {
6676 CS = Dir->getInnermostCapturedStmt();
6677 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6678 }
6679 }
6680 return NT;
6681 }
6682 case OMPD_target_teams_distribute:
6683 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6684 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6685 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6686 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6687 ThreadLimitExpr);
6688 }
6689 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6690 UpperBoundOnly, CondVal);
6691 return NT;
6692 case OMPD_target_teams_loop:
6693 case OMPD_target_parallel_loop:
6694 case OMPD_target_parallel:
6695 case OMPD_target_parallel_for:
6696 case OMPD_target_parallel_for_simd:
6697 case OMPD_target_teams_distribute_parallel_for:
6698 case OMPD_target_teams_distribute_parallel_for_simd: {
6699 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6700 const OMPIfClause *IfClause = nullptr;
6701 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6702 if (C->getNameModifier() == OMPD_unknown ||
6703 C->getNameModifier() == OMPD_parallel) {
6704 IfClause = C;
6705 break;
6706 }
6707 }
6708 if (IfClause) {
6709 const Expr *Cond = IfClause->getCondition();
6710 bool Result;
6711 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6712 if (!Result)
6713 return ReturnSequential();
6714 } else {
6716 *CondVal = CGF.EvaluateExprAsBool(Cond);
6717 }
6718 }
6719 }
6720 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6721 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6722 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6723 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6724 ThreadLimitExpr);
6725 }
6726 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6727 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6728 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6729 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6730 return NumThreadsClause->getNumThreads();
6731 }
6732 return NT;
6733 }
6734 case OMPD_target_teams_distribute_simd:
6735 case OMPD_target_simd:
6736 return ReturnSequential();
6737 default:
6738 break;
6739 }
6740 llvm_unreachable("Unsupported directive kind.");
6741}
6742
6744 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6745 llvm::Value *NumThreadsVal = nullptr;
6746 llvm::Value *CondVal = nullptr;
6747 llvm::Value *ThreadLimitVal = nullptr;
6748 const Expr *ThreadLimitExpr = nullptr;
6749 int32_t UpperBound = -1;
6750
6752 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6753 &ThreadLimitExpr);
6754
6755 // Thread limit expressions are used below, emit them.
6756 if (ThreadLimitExpr) {
6757 ThreadLimitVal =
6758 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6759 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6760 /*isSigned=*/false);
6761 }
6762
6763 // Generate the num teams expression.
6764 if (UpperBound == 1) {
6765 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6766 } else if (NT) {
6767 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6768 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6769 /*isSigned=*/false);
6770 } else if (ThreadLimitVal) {
6771 // If we do not have a num threads value but a thread limit, replace the
6772 // former with the latter. We know handled the thread limit expression.
6773 NumThreadsVal = ThreadLimitVal;
6774 ThreadLimitVal = nullptr;
6775 } else {
6776 // Default to "0" which means runtime choice.
6777 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6778 NumThreadsVal = CGF.Builder.getInt32(0);
6779 }
6780
6781 // Handle if clause. If if clause present, the number of threads is
6782 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6783 if (CondVal) {
6785 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6786 CGF.Builder.getInt32(1));
6787 }
6788
6789 // If the thread limit and num teams expression were present, take the
6790 // minimum.
6791 if (ThreadLimitVal) {
6792 NumThreadsVal = CGF.Builder.CreateSelect(
6793 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6794 ThreadLimitVal, NumThreadsVal);
6795 }
6796
6797 return NumThreadsVal;
6798}
6799
6800namespace {
6802
6803// Utility to handle information from clauses associated with a given
6804// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6805// It provides a convenient interface to obtain the information and generate
6806// code for that information.
6807class MappableExprsHandler {
6808public:
6809 /// Custom comparator for attach-pointer expressions that compares them by
6810 /// complexity (i.e. their component-depth) first, then by the order in which
6811 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6812 /// different.
6813 struct AttachPtrExprComparator {
6814 const MappableExprsHandler &Handler;
6815 // Cache of previous equality comparison results.
6816 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6817 CachedEqualityComparisons;
6818
6819 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6820 AttachPtrExprComparator() = delete;
6821
6822 // Return true iff LHS is "less than" RHS.
6823 bool operator()(const Expr *LHS, const Expr *RHS) const {
6824 if (LHS == RHS)
6825 return false;
6826
6827 // First, compare by complexity (depth)
6828 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6829 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6830
6831 std::optional<size_t> DepthLHS =
6832 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6833 : std::nullopt;
6834 std::optional<size_t> DepthRHS =
6835 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6836 : std::nullopt;
6837
6838 // std::nullopt (no attach pointer) has lowest complexity
6839 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6840 // Both have same complexity, now check semantic equality
6841 if (areEqual(LHS, RHS))
6842 return false;
6843 // Different semantically, compare by computation order
6844 return wasComputedBefore(LHS, RHS);
6845 }
6846 if (!DepthLHS.has_value())
6847 return true; // LHS has lower complexity
6848 if (!DepthRHS.has_value())
6849 return false; // RHS has lower complexity
6850
6851 // Both have values, compare by depth (lower depth = lower complexity)
6852 if (DepthLHS.value() != DepthRHS.value())
6853 return DepthLHS.value() < DepthRHS.value();
6854
6855 // Same complexity, now check semantic equality
6856 if (areEqual(LHS, RHS))
6857 return false;
6858 // Different semantically, compare by computation order
6859 return wasComputedBefore(LHS, RHS);
6860 }
6861
6862 public:
6863 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
6864 /// results, if available, otherwise does a recursive semantic comparison.
6865 bool areEqual(const Expr *LHS, const Expr *RHS) const {
6866 // Check cache first for faster lookup
6867 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
6868 if (CachedResultIt != CachedEqualityComparisons.end())
6869 return CachedResultIt->second;
6870
6871 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
6872
6873 // Cache the result for future lookups (both orders since semantic
6874 // equality is commutative)
6875 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
6876 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
6877 return ComparisonResult;
6878 }
6879
6880 /// Compare the two attach-ptr expressions by their computation order.
6881 /// Returns true iff LHS was computed before RHS by
6882 /// collectAttachPtrExprInfo().
6883 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
6884 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
6885 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
6886
6887 return OrderLHS < OrderRHS;
6888 }
6889
6890 private:
6891 /// Helper function to compare attach-pointer expressions semantically.
6892 /// This function handles various expression types that can be part of an
6893 /// attach-pointer.
6894 /// TODO: Not urgent, but we should ideally return true when comparing
6895 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
6896 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
6897 if (LHS == RHS)
6898 return true;
6899
6900 // If only one is null, they aren't equal
6901 if (!LHS || !RHS)
6902 return false;
6903
6904 ASTContext &Ctx = Handler.CGF.getContext();
6905 // Strip away parentheses and no-op casts to get to the core expression
6906 LHS = LHS->IgnoreParenNoopCasts(Ctx);
6907 RHS = RHS->IgnoreParenNoopCasts(Ctx);
6908
6909 // Direct pointer comparison of the underlying expressions
6910 if (LHS == RHS)
6911 return true;
6912
6913 // Check if the expression classes match
6914 if (LHS->getStmtClass() != RHS->getStmtClass())
6915 return false;
6916
6917 // Handle DeclRefExpr (variable references)
6918 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
6919 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
6920 if (!RD)
6921 return false;
6922 return LD->getDecl()->getCanonicalDecl() ==
6923 RD->getDecl()->getCanonicalDecl();
6924 }
6925
6926 // Handle ArraySubscriptExpr (array indexing like a[i])
6927 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
6928 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
6929 if (!RA)
6930 return false;
6931 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
6932 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
6933 }
6934
6935 // Handle MemberExpr (member access like s.m or p->m)
6936 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
6937 const auto *RM = dyn_cast<MemberExpr>(RHS);
6938 if (!RM)
6939 return false;
6940 if (LM->getMemberDecl()->getCanonicalDecl() !=
6941 RM->getMemberDecl()->getCanonicalDecl())
6942 return false;
6943 return areSemanticallyEqual(LM->getBase(), RM->getBase());
6944 }
6945
6946 // Handle UnaryOperator (unary operations like *p, &x, etc.)
6947 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
6948 const auto *RU = dyn_cast<UnaryOperator>(RHS);
6949 if (!RU)
6950 return false;
6951 if (LU->getOpcode() != RU->getOpcode())
6952 return false;
6953 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
6954 }
6955
6956 // Handle BinaryOperator (binary operations like p + offset)
6957 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
6958 const auto *RB = dyn_cast<BinaryOperator>(RHS);
6959 if (!RB)
6960 return false;
6961 if (LB->getOpcode() != RB->getOpcode())
6962 return false;
6963 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
6964 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
6965 }
6966
6967 // Handle ArraySectionExpr (array sections like a[0:1])
6968 // Attach pointers should not contain array-sections, but currently we
6969 // don't emit an error.
6970 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
6971 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
6972 if (!RAS)
6973 return false;
6974 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
6975 areSemanticallyEqual(LAS->getLowerBound(),
6976 RAS->getLowerBound()) &&
6977 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
6978 }
6979
6980 // Handle CastExpr (explicit casts)
6981 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
6982 const auto *RC = dyn_cast<CastExpr>(RHS);
6983 if (!RC)
6984 return false;
6985 if (LC->getCastKind() != RC->getCastKind())
6986 return false;
6987 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
6988 }
6989
6990 // Handle CXXThisExpr (this pointer)
6991 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
6992 return true;
6993
6994 // Handle IntegerLiteral (integer constants)
6995 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
6996 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
6997 if (!RI)
6998 return false;
6999 return LI->getValue() == RI->getValue();
7000 }
7001
7002 // Handle CharacterLiteral (character constants)
7003 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7004 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7005 if (!RC)
7006 return false;
7007 return LC->getValue() == RC->getValue();
7008 }
7009
7010 // Handle FloatingLiteral (floating point constants)
7011 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7012 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7013 if (!RF)
7014 return false;
7015 // Use bitwise comparison for floating point literals
7016 return LF->getValue().bitwiseIsEqual(RF->getValue());
7017 }
7018
7019 // Handle StringLiteral (string constants)
7020 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7021 const auto *RS = dyn_cast<StringLiteral>(RHS);
7022 if (!RS)
7023 return false;
7024 return LS->getString() == RS->getString();
7025 }
7026
7027 // Handle CXXNullPtrLiteralExpr (nullptr)
7029 return true;
7030
7031 // Handle CXXBoolLiteralExpr (true/false)
7032 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7033 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7034 if (!RB)
7035 return false;
7036 return LB->getValue() == RB->getValue();
7037 }
7038
7039 // Fallback for other forms - use the existing comparison method
7040 return Expr::isSameComparisonOperand(LHS, RHS);
7041 }
7042 };
7043
7044 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7045 static unsigned getFlagMemberOffset() {
7046 unsigned Offset = 0;
7047 for (uint64_t Remain =
7048 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7049 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7050 !(Remain & 1); Remain = Remain >> 1)
7051 Offset++;
7052 return Offset;
7053 }
7054
7055 /// Class that holds debugging information for a data mapping to be passed to
7056 /// the runtime library.
7057 class MappingExprInfo {
7058 /// The variable declaration used for the data mapping.
7059 const ValueDecl *MapDecl = nullptr;
7060 /// The original expression used in the map clause, or null if there is
7061 /// none.
7062 const Expr *MapExpr = nullptr;
7063
7064 public:
7065 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7066 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7067
7068 const ValueDecl *getMapDecl() const { return MapDecl; }
7069 const Expr *getMapExpr() const { return MapExpr; }
7070 };
7071
7072 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7073 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7074 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7075 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7076 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7077 using MapNonContiguousArrayTy =
7078 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7079 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7080 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7081 using MapData =
7083 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7084 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7085 using MapDataArrayTy = SmallVector<MapData, 4>;
7086
7087 /// This structure contains combined information generated for mappable
7088 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7089 /// mappers, and non-contiguous information.
7090 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7091 MapExprsArrayTy Exprs;
7092 MapValueDeclsArrayTy Mappers;
7093 MapValueDeclsArrayTy DevicePtrDecls;
7094
7095 /// Append arrays in \a CurInfo.
7096 void append(MapCombinedInfoTy &CurInfo) {
7097 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7098 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7099 CurInfo.DevicePtrDecls.end());
7100 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7101 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7102 }
7103 };
7104
7105 /// Map between a struct and the its lowest & highest elements which have been
7106 /// mapped.
7107 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7108 /// HE(FieldIndex, Pointer)}
7109 struct StructRangeInfoTy {
7110 MapCombinedInfoTy PreliminaryMapData;
7111 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7112 0, Address::invalid()};
7113 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7114 0, Address::invalid()};
7115 Address Base = Address::invalid();
7116 Address LB = Address::invalid();
7117 bool IsArraySection = false;
7118 bool HasCompleteRecord = false;
7119 };
7120
7121 /// A struct to store the attach pointer and pointee information, to be used
7122 /// when emitting an attach entry.
7123 struct AttachInfoTy {
7124 Address AttachPtrAddr = Address::invalid();
7125 Address AttachPteeAddr = Address::invalid();
7126 const ValueDecl *AttachPtrDecl = nullptr;
7127 const Expr *AttachMapExpr = nullptr;
7128
7129 bool isValid() const {
7130 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7131 }
7132 };
7133
7134 /// Check if there's any component list where the attach pointer expression
7135 /// matches the given captured variable.
7136 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7137 for (const auto &AttachEntry : AttachPtrExprMap) {
7138 if (AttachEntry.second) {
7139 // Check if the attach pointer expression is a DeclRefExpr that
7140 // references the captured variable
7141 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7142 if (DRE->getDecl() == VD)
7143 return true;
7144 }
7145 }
7146 return false;
7147 }
7148
7149 /// Get the previously-cached attach pointer for a component list, if-any.
7150 const Expr *getAttachPtrExpr(
7152 const {
7153 const auto It = AttachPtrExprMap.find(Components);
7154 if (It != AttachPtrExprMap.end())
7155 return It->second;
7156
7157 return nullptr;
7158 }
7159
7160private:
7161 /// Kind that defines how a device pointer has to be returned.
7162 struct MapInfo {
7165 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7166 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7167 bool ReturnDevicePointer = false;
7168 bool IsImplicit = false;
7169 const ValueDecl *Mapper = nullptr;
7170 const Expr *VarRef = nullptr;
7171 bool ForDeviceAddr = false;
7172
7173 MapInfo() = default;
7174 MapInfo(
7176 OpenMPMapClauseKind MapType,
7177 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7178 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7179 bool ReturnDevicePointer, bool IsImplicit,
7180 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7181 bool ForDeviceAddr = false)
7182 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7183 MotionModifiers(MotionModifiers),
7184 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7185 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7186 };
7187
7188 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7189 /// member and there is no map information about it, then emission of that
7190 /// entry is deferred until the whole struct has been processed.
7191 struct DeferredDevicePtrEntryTy {
7192 const Expr *IE = nullptr;
7193 const ValueDecl *VD = nullptr;
7194 bool ForDeviceAddr = false;
7195
7196 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7197 bool ForDeviceAddr)
7198 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7199 };
7200
7201 /// The target directive from where the mappable clauses were extracted. It
7202 /// is either a executable directive or a user-defined mapper directive.
7203 llvm::PointerUnion<const OMPExecutableDirective *,
7204 const OMPDeclareMapperDecl *>
7205 CurDir;
7206
7207 /// Function the directive is being generated for.
7208 CodeGenFunction &CGF;
7209
7210 /// Set of all first private variables in the current directive.
7211 /// bool data is set to true if the variable is implicitly marked as
7212 /// firstprivate, false otherwise.
7213 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7214
7215 /// Set of defaultmap clause kinds that use firstprivate behavior.
7216 llvm::SmallSet<OpenMPDefaultmapClauseKind, 4> DefaultmapFirstprivateKinds;
7217
7218 /// Map between device pointer declarations and their expression components.
7219 /// The key value for declarations in 'this' is null.
7220 llvm::DenseMap<
7221 const ValueDecl *,
7222 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7223 DevPointersMap;
7224
7225 /// Map between device addr declarations and their expression components.
7226 /// The key value for declarations in 'this' is null.
7227 llvm::DenseMap<
7228 const ValueDecl *,
7229 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7230 HasDevAddrsMap;
7231
7232 /// Map between lambda declarations and their map type.
7233 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7234
7235 /// Map from component lists to their attach pointer expressions.
7237 const Expr *>
7238 AttachPtrExprMap;
7239
7240 /// Map from attach pointer expressions to their component depth.
7241 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7242 /// expressions with increasing/decreasing depth.
7243 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7244 /// TODO: Not urgent, but we should ideally use the number of pointer
7245 /// dereferences in an expr as an indicator of its complexity, instead of the
7246 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7247 /// `*(p + 5 + 5)` together.
7248 llvm::DenseMap<const Expr *, std::optional<size_t>>
7249 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7250
7251 /// Map from attach pointer expressions to the order they were computed in, in
7252 /// collectAttachPtrExprInfo().
7253 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7254 {nullptr, 0}};
7255
7256 /// An instance of attach-ptr-expr comparator that can be used throughout the
7257 /// lifetime of this handler.
7258 AttachPtrExprComparator AttachPtrComparator;
7259
7260 llvm::Value *getExprTypeSize(const Expr *E) const {
7261 QualType ExprTy = E->getType().getCanonicalType();
7262
7263 // Calculate the size for array shaping expression.
7264 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7265 llvm::Value *Size =
7266 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7267 for (const Expr *SE : OAE->getDimensions()) {
7268 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7269 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7270 CGF.getContext().getSizeType(),
7271 SE->getExprLoc());
7272 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7273 }
7274 return Size;
7275 }
7276
7277 // Reference types are ignored for mapping purposes.
7278 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7279 ExprTy = RefTy->getPointeeType().getCanonicalType();
7280
7281 // Given that an array section is considered a built-in type, we need to
7282 // do the calculation based on the length of the section instead of relying
7283 // on CGF.getTypeSize(E->getType()).
7284 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7285 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7286 OAE->getBase()->IgnoreParenImpCasts())
7288
7289 // If there is no length associated with the expression and lower bound is
7290 // not specified too, that means we are using the whole length of the
7291 // base.
7292 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7293 !OAE->getLowerBound())
7294 return CGF.getTypeSize(BaseTy);
7295
7296 llvm::Value *ElemSize;
7297 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7298 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7299 } else {
7300 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7301 assert(ATy && "Expecting array type if not a pointer type.");
7302 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7303 }
7304
7305 // If we don't have a length at this point, that is because we have an
7306 // array section with a single element.
7307 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7308 return ElemSize;
7309
7310 if (const Expr *LenExpr = OAE->getLength()) {
7311 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7312 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7313 CGF.getContext().getSizeType(),
7314 LenExpr->getExprLoc());
7315 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7316 }
7317 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7318 OAE->getLowerBound() && "expected array_section[lb:].");
7319 // Size = sizetype - lb * elemtype;
7320 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7321 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7322 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7323 CGF.getContext().getSizeType(),
7324 OAE->getLowerBound()->getExprLoc());
7325 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7326 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7327 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7328 LengthVal = CGF.Builder.CreateSelect(
7329 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7330 return LengthVal;
7331 }
7332 return CGF.getTypeSize(ExprTy);
7333 }
7334
7335 /// Return the corresponding bits for a given map clause modifier. Add
7336 /// a flag marking the map as a pointer if requested. Add a flag marking the
7337 /// map as the first one of a series of maps that relate to the same map
7338 /// expression.
7339 OpenMPOffloadMappingFlags getMapTypeBits(
7340 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7341 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7342 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7343 OpenMPOffloadMappingFlags Bits =
7344 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7345 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7346 switch (MapType) {
7347 case OMPC_MAP_alloc:
7348 case OMPC_MAP_release:
7349 // alloc and release is the default behavior in the runtime library, i.e.
7350 // if we don't pass any bits alloc/release that is what the runtime is
7351 // going to do. Therefore, we don't need to signal anything for these two
7352 // type modifiers.
7353 break;
7354 case OMPC_MAP_to:
7355 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7356 break;
7357 case OMPC_MAP_from:
7358 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7359 break;
7360 case OMPC_MAP_tofrom:
7361 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7362 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7363 break;
7364 case OMPC_MAP_delete:
7365 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7366 break;
7367 case OMPC_MAP_unknown:
7368 llvm_unreachable("Unexpected map type!");
7369 }
7370 if (AddPtrFlag)
7371 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7372 if (AddIsTargetParamFlag)
7373 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7374 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7375 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7376 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7377 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7378 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7379 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7380 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7381 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7382 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7383 if (IsNonContiguous)
7384 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7385 return Bits;
7386 }
7387
7388 /// Return true if the provided expression is a final array section. A
7389 /// final array section, is one whose length can't be proved to be one.
7390 bool isFinalArraySectionExpression(const Expr *E) const {
7391 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7392
7393 // It is not an array section and therefore not a unity-size one.
7394 if (!OASE)
7395 return false;
7396
7397 // An array section with no colon always refer to a single element.
7398 if (OASE->getColonLocFirst().isInvalid())
7399 return false;
7400
7401 const Expr *Length = OASE->getLength();
7402
7403 // If we don't have a length we have to check if the array has size 1
7404 // for this dimension. Also, we should always expect a length if the
7405 // base type is pointer.
7406 if (!Length) {
7407 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7408 OASE->getBase()->IgnoreParenImpCasts())
7410 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7411 return ATy->getSExtSize() != 1;
7412 // If we don't have a constant dimension length, we have to consider
7413 // the current section as having any size, so it is not necessarily
7414 // unitary. If it happen to be unity size, that's user fault.
7415 return true;
7416 }
7417
7418 // Check if the length evaluates to 1.
7419 Expr::EvalResult Result;
7420 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7421 return true; // Can have more that size 1.
7422
7423 llvm::APSInt ConstLength = Result.Val.getInt();
7424 return ConstLength.getSExtValue() != 1;
7425 }
7426
7427 /// A helper class to copy structures with overlapped elements, i.e. those
7428 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7429 /// are not explicitly copied have mapping nodes synthesized for them,
7430 /// taking care to avoid generating zero-sized copies.
7431 class CopyOverlappedEntryGaps {
7432 CodeGenFunction &CGF;
7433 MapCombinedInfoTy &CombinedInfo;
7434 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7435 const ValueDecl *MapDecl = nullptr;
7436 const Expr *MapExpr = nullptr;
7437 Address BP = Address::invalid();
7438 bool IsNonContiguous = false;
7439 uint64_t DimSize = 0;
7440 // These elements track the position as the struct is iterated over
7441 // (in order of increasing element address).
7442 const RecordDecl *LastParent = nullptr;
7443 uint64_t Cursor = 0;
7444 unsigned LastIndex = -1u;
7445 Address LB = Address::invalid();
7446
7447 public:
7448 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7449 MapCombinedInfoTy &CombinedInfo,
7450 OpenMPOffloadMappingFlags Flags,
7451 const ValueDecl *MapDecl, const Expr *MapExpr,
7452 Address BP, Address LB, bool IsNonContiguous,
7453 uint64_t DimSize)
7454 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7455 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7456 DimSize(DimSize), LB(LB) {}
7457
7458 void processField(
7459 const OMPClauseMappableExprCommon::MappableComponent &MC,
7460 const FieldDecl *FD,
7461 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7462 EmitMemberExprBase) {
7463 const RecordDecl *RD = FD->getParent();
7464 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7465 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7466 uint64_t FieldSize =
7468 Address ComponentLB = Address::invalid();
7469
7470 if (FD->getType()->isLValueReferenceType()) {
7471 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7472 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7473 ComponentLB =
7474 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7475 } else {
7476 ComponentLB =
7478 }
7479
7480 if (!LastParent)
7481 LastParent = RD;
7482 if (FD->getParent() == LastParent) {
7483 if (FD->getFieldIndex() != LastIndex + 1)
7484 copyUntilField(FD, ComponentLB);
7485 } else {
7486 LastParent = FD->getParent();
7487 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7488 copyUntilField(FD, ComponentLB);
7489 }
7490 Cursor = FieldOffset + FieldSize;
7491 LastIndex = FD->getFieldIndex();
7492 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7493 }
7494
7495 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7496 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7497 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7498 llvm::Value *Size =
7499 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7500 copySizedChunk(LBPtr, Size);
7501 }
7502
7503 void copyUntilEnd(Address HB) {
7504 if (LastParent) {
7505 const ASTRecordLayout &RL =
7506 CGF.getContext().getASTRecordLayout(LastParent);
7507 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7508 return;
7509 }
7510 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7511 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7512 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7513 LBPtr);
7514 copySizedChunk(LBPtr, Size);
7515 }
7516
7517 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7518 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7519 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7520 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7521 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7522 CombinedInfo.Pointers.push_back(Base);
7523 CombinedInfo.Sizes.push_back(
7524 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7525 CombinedInfo.Types.push_back(Flags);
7526 CombinedInfo.Mappers.push_back(nullptr);
7527 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7528 }
7529 };
7530
7531 /// Generate the base pointers, section pointers, sizes, map type bits, and
7532 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7533 /// map type, map or motion modifiers, and expression components.
7534 /// \a IsFirstComponent should be set to true if the provided set of
7535 /// components is the first associated with a capture.
7536 void generateInfoForComponentList(
7537 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7538 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7540 MapCombinedInfoTy &CombinedInfo,
7541 MapCombinedInfoTy &StructBaseCombinedInfo,
7542 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7543 bool IsImplicit, bool GenerateAllInfoForClauses,
7544 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7545 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7546 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7547 OverlappedElements = {},
7548 bool AreBothBasePtrAndPteeMapped = false) const {
7549 // The following summarizes what has to be generated for each map and the
7550 // types below. The generated information is expressed in this order:
7551 // base pointer, section pointer, size, flags
7552 // (to add to the ones that come from the map type and modifier).
7553 //
7554 // double d;
7555 // int i[100];
7556 // float *p;
7557 // int **a = &i;
7558 //
7559 // struct S1 {
7560 // int i;
7561 // float f[50];
7562 // }
7563 // struct S2 {
7564 // int i;
7565 // float f[50];
7566 // S1 s;
7567 // double *p;
7568 // struct S2 *ps;
7569 // int &ref;
7570 // }
7571 // S2 s;
7572 // S2 *ps;
7573 //
7574 // map(d)
7575 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7576 //
7577 // map(i)
7578 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7579 //
7580 // map(i[1:23])
7581 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7582 //
7583 // map(p)
7584 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7585 //
7586 // map(p[1:24])
7587 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7588 // in unified shared memory mode or for local pointers
7589 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7590 //
7591 // map((*a)[0:3])
7592 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7593 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7594 //
7595 // map(**a)
7596 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7597 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7598 //
7599 // map(s)
7600 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7601 //
7602 // map(s.i)
7603 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7604 //
7605 // map(s.s.f)
7606 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7607 //
7608 // map(s.p)
7609 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7610 //
7611 // map(to: s.p[:22])
7612 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7613 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7614 // &(s.p), &(s.p[0]), 22*sizeof(double),
7615 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7616 // (*) alloc space for struct members, only this is a target parameter
7617 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7618 // optimizes this entry out, same in the examples below)
7619 // (***) map the pointee (map: to)
7620 //
7621 // map(to: s.ref)
7622 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7623 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7624 // (*) alloc space for struct members, only this is a target parameter
7625 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7626 // optimizes this entry out, same in the examples below)
7627 // (***) map the pointee (map: to)
7628 //
7629 // map(s.ps)
7630 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7631 //
7632 // map(from: s.ps->s.i)
7633 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7634 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7635 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7636 //
7637 // map(to: s.ps->ps)
7638 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7639 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7640 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7641 //
7642 // map(s.ps->ps->ps)
7643 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7644 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7645 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7646 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7647 //
7648 // map(to: s.ps->ps->s.f[:22])
7649 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7650 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7651 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7652 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7653 //
7654 // map(ps)
7655 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7656 //
7657 // map(ps->i)
7658 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7659 //
7660 // map(ps->s.f)
7661 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7662 //
7663 // map(from: ps->p)
7664 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7665 //
7666 // map(to: ps->p[:22])
7667 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7668 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7669 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7670 //
7671 // map(ps->ps)
7672 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7673 //
7674 // map(from: ps->ps->s.i)
7675 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7676 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7677 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7678 //
7679 // map(from: ps->ps->ps)
7680 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7681 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7682 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7683 //
7684 // map(ps->ps->ps->ps)
7685 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7686 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7687 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7688 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7689 //
7690 // map(to: ps->ps->ps->s.f[:22])
7691 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7692 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7693 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7694 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7695 //
7696 // map(to: s.f[:22]) map(from: s.p[:33])
7697 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7698 // sizeof(double*) (**), TARGET_PARAM
7699 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7700 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7701 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7702 // (*) allocate contiguous space needed to fit all mapped members even if
7703 // we allocate space for members not mapped (in this example,
7704 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7705 // them as well because they fall between &s.f[0] and &s.p)
7706 //
7707 // map(from: s.f[:22]) map(to: ps->p[:33])
7708 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7709 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7710 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7711 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7712 // (*) the struct this entry pertains to is the 2nd element in the list of
7713 // arguments, hence MEMBER_OF(2)
7714 //
7715 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7716 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7717 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7718 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7719 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7720 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7721 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7722 // (*) the struct this entry pertains to is the 4th element in the list
7723 // of arguments, hence MEMBER_OF(4)
7724 //
7725 // map(p, p[:100])
7726 // ===> map(p[:100])
7727 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7728
7729 // Track if the map information being generated is the first for a capture.
7730 bool IsCaptureFirstInfo = IsFirstComponentList;
7731 // When the variable is on a declare target link or in a to clause with
7732 // unified memory, a reference is needed to hold the host/device address
7733 // of the variable.
7734 bool RequiresReference = false;
7735
7736 // Scan the components from the base to the complete expression.
7737 auto CI = Components.rbegin();
7738 auto CE = Components.rend();
7739 auto I = CI;
7740
7741 // Track if the map information being generated is the first for a list of
7742 // components.
7743 bool IsExpressionFirstInfo = true;
7744 bool FirstPointerInComplexData = false;
7745 Address BP = Address::invalid();
7746 const Expr *AssocExpr = I->getAssociatedExpression();
7747 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7748 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7749 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7750
7751 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7752 return;
7753 if (isa<MemberExpr>(AssocExpr)) {
7754 // The base is the 'this' pointer. The content of the pointer is going
7755 // to be the base of the field being mapped.
7756 BP = CGF.LoadCXXThisAddress();
7757 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7758 (OASE &&
7759 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7760 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7761 } else if (OAShE &&
7762 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7763 BP = Address(
7764 CGF.EmitScalarExpr(OAShE->getBase()),
7765 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7766 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7767 } else {
7768 // The base is the reference to the variable.
7769 // BP = &Var.
7770 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7771 if (const auto *VD =
7772 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7773 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7774 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7775 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7776 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7777 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7779 RequiresReference = true;
7781 }
7782 }
7783 }
7784
7785 // If the variable is a pointer and is being dereferenced (i.e. is not
7786 // the last component), the base has to be the pointer itself, not its
7787 // reference. References are ignored for mapping purposes.
7788 QualType Ty =
7789 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7790 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7791 // No need to generate individual map information for the pointer, it
7792 // can be associated with the combined storage if shared memory mode is
7793 // active or the base declaration is not global variable.
7794 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7795 if (!AreBothBasePtrAndPteeMapped &&
7797 !VD || VD->hasLocalStorage()))
7798 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7799 else
7800 FirstPointerInComplexData = true;
7801 ++I;
7802 }
7803 }
7804
7805 // Track whether a component of the list should be marked as MEMBER_OF some
7806 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7807 // in a component list should be marked as MEMBER_OF, all subsequent entries
7808 // do not belong to the base struct. E.g.
7809 // struct S2 s;
7810 // s.ps->ps->ps->f[:]
7811 // (1) (2) (3) (4)
7812 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7813 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7814 // is the pointee of ps(2) which is not member of struct s, so it should not
7815 // be marked as such (it is still PTR_AND_OBJ).
7816 // The variable is initialized to false so that PTR_AND_OBJ entries which
7817 // are not struct members are not considered (e.g. array of pointers to
7818 // data).
7819 bool ShouldBeMemberOf = false;
7820
7821 // Variable keeping track of whether or not we have encountered a component
7822 // in the component list which is a member expression. Useful when we have a
7823 // pointer or a final array section, in which case it is the previous
7824 // component in the list which tells us whether we have a member expression.
7825 // E.g. X.f[:]
7826 // While processing the final array section "[:]" it is "f" which tells us
7827 // whether we are dealing with a member of a declared struct.
7828 const MemberExpr *EncounteredME = nullptr;
7829
7830 // Track for the total number of dimension. Start from one for the dummy
7831 // dimension.
7832 uint64_t DimSize = 1;
7833
7834 // Detects non-contiguous updates due to strided accesses.
7835 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7836 // correctly when generating information to be passed to the runtime. The
7837 // flag is set to true if any array section has a stride not equal to 1, or
7838 // if the stride is not a constant expression (conservatively assumed
7839 // non-contiguous).
7840 bool IsNonContiguous =
7841 CombinedInfo.NonContigInfo.IsNonContiguous ||
7842 any_of(Components, [&](const auto &Component) {
7843 const auto *OASE =
7844 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7845 if (!OASE)
7846 return false;
7847
7848 const Expr *StrideExpr = OASE->getStride();
7849 if (!StrideExpr)
7850 return false;
7851
7852 const auto Constant =
7853 StrideExpr->getIntegerConstantExpr(CGF.getContext());
7854 if (!Constant)
7855 return false;
7856
7857 return !Constant->isOne();
7858 });
7859
7860 bool IsPrevMemberReference = false;
7861
7862 bool IsPartialMapped =
7863 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7864
7865 // We need to check if we will be encountering any MEs. If we do not
7866 // encounter any ME expression it means we will be mapping the whole struct.
7867 // In that case we need to skip adding an entry for the struct to the
7868 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7869 // list only when generating all info for clauses.
7870 bool IsMappingWholeStruct = true;
7871 if (!GenerateAllInfoForClauses) {
7872 IsMappingWholeStruct = false;
7873 } else {
7874 for (auto TempI = I; TempI != CE; ++TempI) {
7875 const MemberExpr *PossibleME =
7876 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7877 if (PossibleME) {
7878 IsMappingWholeStruct = false;
7879 break;
7880 }
7881 }
7882 }
7883
7884 for (; I != CE; ++I) {
7885 // If the current component is member of a struct (parent struct) mark it.
7886 if (!EncounteredME) {
7887 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7888 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7889 // as MEMBER_OF the parent struct.
7890 if (EncounteredME) {
7891 ShouldBeMemberOf = true;
7892 // Do not emit as complex pointer if this is actually not array-like
7893 // expression.
7894 if (FirstPointerInComplexData) {
7895 QualType Ty = std::prev(I)
7896 ->getAssociatedDeclaration()
7897 ->getType()
7898 .getNonReferenceType();
7899 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7900 FirstPointerInComplexData = false;
7901 }
7902 }
7903 }
7904
7905 auto Next = std::next(I);
7906
7907 // We need to generate the addresses and sizes if this is the last
7908 // component, if the component is a pointer or if it is an array section
7909 // whose length can't be proved to be one. If this is a pointer, it
7910 // becomes the base address for the following components.
7911
7912 // A final array section, is one whose length can't be proved to be one.
7913 // If the map item is non-contiguous then we don't treat any array section
7914 // as final array section.
7915 bool IsFinalArraySection =
7916 !IsNonContiguous &&
7917 isFinalArraySectionExpression(I->getAssociatedExpression());
7918
7919 // If we have a declaration for the mapping use that, otherwise use
7920 // the base declaration of the map clause.
7921 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7922 ? I->getAssociatedDeclaration()
7923 : BaseDecl;
7924 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7925 : MapExpr;
7926
7927 // Get information on whether the element is a pointer. Have to do a
7928 // special treatment for array sections given that they are built-in
7929 // types.
7930 const auto *OASE =
7931 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7932 const auto *OAShE =
7933 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7934 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7935 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7936 bool IsPointer =
7937 OAShE ||
7940 ->isAnyPointerType()) ||
7941 I->getAssociatedExpression()->getType()->isAnyPointerType();
7942 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7943 MapDecl &&
7944 MapDecl->getType()->isLValueReferenceType();
7945 bool IsNonDerefPointer = IsPointer &&
7946 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7947 !IsNonContiguous;
7948
7949 if (OASE)
7950 ++DimSize;
7951
7952 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7953 IsFinalArraySection) {
7954 // If this is not the last component, we expect the pointer to be
7955 // associated with an array expression or member expression.
7956 assert((Next == CE ||
7957 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7958 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7959 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7960 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7961 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7962 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7963 "Unexpected expression");
7964
7965 Address LB = Address::invalid();
7966 Address LowestElem = Address::invalid();
7967 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7968 const MemberExpr *E) {
7969 const Expr *BaseExpr = E->getBase();
7970 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7971 // scalar.
7972 LValue BaseLV;
7973 if (E->isArrow()) {
7974 LValueBaseInfo BaseInfo;
7975 TBAAAccessInfo TBAAInfo;
7976 Address Addr =
7977 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7978 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7979 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7980 } else {
7981 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7982 }
7983 return BaseLV;
7984 };
7985 if (OAShE) {
7986 LowestElem = LB =
7987 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7989 OAShE->getBase()->getType()->getPointeeType()),
7991 OAShE->getBase()->getType()));
7992 } else if (IsMemberReference) {
7993 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7994 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7995 LowestElem = CGF.EmitLValueForFieldInitialization(
7996 BaseLVal, cast<FieldDecl>(MapDecl))
7997 .getAddress();
7998 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7999 .getAddress();
8000 } else {
8001 LowestElem = LB =
8002 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8003 .getAddress();
8004 }
8005
8006 // If this component is a pointer inside the base struct then we don't
8007 // need to create any entry for it - it will be combined with the object
8008 // it is pointing to into a single PTR_AND_OBJ entry.
8009 bool IsMemberPointerOrAddr =
8010 EncounteredME &&
8011 (((IsPointer || ForDeviceAddr) &&
8012 I->getAssociatedExpression() == EncounteredME) ||
8013 (IsPrevMemberReference && !IsPointer) ||
8014 (IsMemberReference && Next != CE &&
8015 !Next->getAssociatedExpression()->getType()->isPointerType()));
8016 if (!OverlappedElements.empty() && Next == CE) {
8017 // Handle base element with the info for overlapped elements.
8018 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8019 assert(!IsPointer &&
8020 "Unexpected base element with the pointer type.");
8021 // Mark the whole struct as the struct that requires allocation on the
8022 // device.
8023 PartialStruct.LowestElem = {0, LowestElem};
8024 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8025 I->getAssociatedExpression()->getType());
8026 Address HB = CGF.Builder.CreateConstGEP(
8028 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8029 TypeSize.getQuantity() - 1);
8030 PartialStruct.HighestElem = {
8031 std::numeric_limits<decltype(
8032 PartialStruct.HighestElem.first)>::max(),
8033 HB};
8034 PartialStruct.Base = BP;
8035 PartialStruct.LB = LB;
8036 assert(
8037 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8038 "Overlapped elements must be used only once for the variable.");
8039 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8040 // Emit data for non-overlapped data.
8041 OpenMPOffloadMappingFlags Flags =
8042 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8043 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8044 /*AddPtrFlag=*/false,
8045 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8046 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8047 MapExpr, BP, LB, IsNonContiguous,
8048 DimSize);
8049 // Do bitcopy of all non-overlapped structure elements.
8051 Component : OverlappedElements) {
8052 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8053 Component) {
8054 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8055 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8056 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8057 }
8058 }
8059 }
8060 }
8061 CopyGaps.copyUntilEnd(HB);
8062 break;
8063 }
8064 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8065 // Skip adding an entry in the CurInfo of this combined entry if the
8066 // whole struct is currently being mapped. The struct needs to be added
8067 // in the first position before any data internal to the struct is being
8068 // mapped.
8069 // Skip adding an entry in the CurInfo of this combined entry if the
8070 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8071 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8072 (Next == CE && MapType != OMPC_MAP_unknown)) {
8073 if (!IsMappingWholeStruct) {
8074 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8075 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8076 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8077 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8078 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8079 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8080 Size, CGF.Int64Ty, /*isSigned=*/true));
8081 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8082 : 1);
8083 } else {
8084 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8085 StructBaseCombinedInfo.BasePointers.push_back(
8086 BP.emitRawPointer(CGF));
8087 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8088 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8089 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8090 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8091 Size, CGF.Int64Ty, /*isSigned=*/true));
8092 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8093 IsNonContiguous ? DimSize : 1);
8094 }
8095
8096 // If Mapper is valid, the last component inherits the mapper.
8097 bool HasMapper = Mapper && Next == CE;
8098 if (!IsMappingWholeStruct)
8099 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8100 else
8101 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8102 : nullptr);
8103
8104 // We need to add a pointer flag for each map that comes from the
8105 // same expression except for the first one. We also need to signal
8106 // this map is the first one that relates with the current capture
8107 // (there is a set of entries for each capture).
8108 OpenMPOffloadMappingFlags Flags =
8109 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8110 !IsExpressionFirstInfo || RequiresReference ||
8111 FirstPointerInComplexData || IsMemberReference,
8112 AreBothBasePtrAndPteeMapped ||
8113 (IsCaptureFirstInfo && !RequiresReference),
8114 IsNonContiguous);
8115
8116 if (!IsExpressionFirstInfo || IsMemberReference) {
8117 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8118 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8119 if (IsPointer || (IsMemberReference && Next != CE))
8120 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8121 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8122 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8123 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8124 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8125
8126 if (ShouldBeMemberOf) {
8127 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8128 // should be later updated with the correct value of MEMBER_OF.
8129 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8130 // From now on, all subsequent PTR_AND_OBJ entries should not be
8131 // marked as MEMBER_OF.
8132 ShouldBeMemberOf = false;
8133 }
8134 }
8135
8136 if (!IsMappingWholeStruct)
8137 CombinedInfo.Types.push_back(Flags);
8138 else
8139 StructBaseCombinedInfo.Types.push_back(Flags);
8140 }
8141
8142 // If we have encountered a member expression so far, keep track of the
8143 // mapped member. If the parent is "*this", then the value declaration
8144 // is nullptr.
8145 if (EncounteredME) {
8146 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8147 unsigned FieldIndex = FD->getFieldIndex();
8148
8149 // Update info about the lowest and highest elements for this struct
8150 if (!PartialStruct.Base.isValid()) {
8151 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8152 if (IsFinalArraySection && OASE) {
8153 Address HB =
8154 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8155 .getAddress();
8156 PartialStruct.HighestElem = {FieldIndex, HB};
8157 } else {
8158 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8159 }
8160 PartialStruct.Base = BP;
8161 PartialStruct.LB = BP;
8162 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8163 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8164 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8165 if (IsFinalArraySection && OASE) {
8166 Address HB =
8167 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8168 .getAddress();
8169 PartialStruct.HighestElem = {FieldIndex, HB};
8170 } else {
8171 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8172 }
8173 }
8174 }
8175
8176 // Need to emit combined struct for array sections.
8177 if (IsFinalArraySection || IsNonContiguous)
8178 PartialStruct.IsArraySection = true;
8179
8180 // If we have a final array section, we are done with this expression.
8181 if (IsFinalArraySection)
8182 break;
8183
8184 // The pointer becomes the base for the next element.
8185 if (Next != CE)
8186 BP = IsMemberReference ? LowestElem : LB;
8187 if (!IsPartialMapped)
8188 IsExpressionFirstInfo = false;
8189 IsCaptureFirstInfo = false;
8190 FirstPointerInComplexData = false;
8191 IsPrevMemberReference = IsMemberReference;
8192 } else if (FirstPointerInComplexData) {
8193 QualType Ty = Components.rbegin()
8194 ->getAssociatedDeclaration()
8195 ->getType()
8196 .getNonReferenceType();
8197 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8198 FirstPointerInComplexData = false;
8199 }
8200 }
8201 // If ran into the whole component - allocate the space for the whole
8202 // record.
8203 if (!EncounteredME)
8204 PartialStruct.HasCompleteRecord = true;
8205
8206 if (!IsNonContiguous)
8207 return;
8208
8209 const ASTContext &Context = CGF.getContext();
8210
8211 // For supporting stride in array section, we need to initialize the first
8212 // dimension size as 1, first offset as 0, and first count as 1
8213 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8214 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8215 MapValuesArrayTy CurStrides;
8216 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8217 uint64_t ElementTypeSize;
8218
8219 // Collect Size information for each dimension and get the element size as
8220 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8221 // should be [10, 10] and the first stride is 4 btyes.
8222 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8223 Components) {
8224 const Expr *AssocExpr = Component.getAssociatedExpression();
8225 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8226
8227 if (!OASE)
8228 continue;
8229
8230 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8231 auto *CAT = Context.getAsConstantArrayType(Ty);
8232 auto *VAT = Context.getAsVariableArrayType(Ty);
8233
8234 // We need all the dimension size except for the last dimension.
8235 assert((VAT || CAT || &Component == &*Components.begin()) &&
8236 "Should be either ConstantArray or VariableArray if not the "
8237 "first Component");
8238
8239 // Get element size if CurStrides is empty.
8240 if (CurStrides.empty()) {
8241 const Type *ElementType = nullptr;
8242 if (CAT)
8243 ElementType = CAT->getElementType().getTypePtr();
8244 else if (VAT)
8245 ElementType = VAT->getElementType().getTypePtr();
8246 else
8247 assert(&Component == &*Components.begin() &&
8248 "Only expect pointer (non CAT or VAT) when this is the "
8249 "first Component");
8250 // If ElementType is null, then it means the base is a pointer
8251 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8252 // for next iteration.
8253 if (ElementType) {
8254 // For the case that having pointer as base, we need to remove one
8255 // level of indirection.
8256 if (&Component != &*Components.begin())
8257 ElementType = ElementType->getPointeeOrArrayElementType();
8258 ElementTypeSize =
8259 Context.getTypeSizeInChars(ElementType).getQuantity();
8260 CurStrides.push_back(
8261 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8262 }
8263 }
8264 // Get dimension value except for the last dimension since we don't need
8265 // it.
8266 if (DimSizes.size() < Components.size() - 1) {
8267 if (CAT)
8268 DimSizes.push_back(
8269 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8270 else if (VAT)
8271 DimSizes.push_back(CGF.Builder.CreateIntCast(
8272 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8273 /*IsSigned=*/false));
8274 }
8275 }
8276
8277 // Skip the dummy dimension since we have already have its information.
8278 auto *DI = DimSizes.begin() + 1;
8279 // Product of dimension.
8280 llvm::Value *DimProd =
8281 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8282
8283 // Collect info for non-contiguous. Notice that offset, count, and stride
8284 // are only meaningful for array-section, so we insert a null for anything
8285 // other than array-section.
8286 // Also, the size of offset, count, and stride are not the same as
8287 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8288 // count, and stride are the same as the number of non-contiguous
8289 // declaration in target update to/from clause.
8290 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8291 Components) {
8292 const Expr *AssocExpr = Component.getAssociatedExpression();
8293
8294 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8295 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8296 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8297 /*isSigned=*/false);
8298 CurOffsets.push_back(Offset);
8299 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8300 CurStrides.push_back(CurStrides.back());
8301 continue;
8302 }
8303
8304 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8305
8306 if (!OASE)
8307 continue;
8308
8309 // Offset
8310 const Expr *OffsetExpr = OASE->getLowerBound();
8311 llvm::Value *Offset = nullptr;
8312 if (!OffsetExpr) {
8313 // If offset is absent, then we just set it to zero.
8314 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8315 } else {
8316 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8317 CGF.Int64Ty,
8318 /*isSigned=*/false);
8319 }
8320 CurOffsets.push_back(Offset);
8321
8322 // Count
8323 const Expr *CountExpr = OASE->getLength();
8324 llvm::Value *Count = nullptr;
8325 if (!CountExpr) {
8326 // In Clang, once a high dimension is an array section, we construct all
8327 // the lower dimension as array section, however, for case like
8328 // arr[0:2][2], Clang construct the inner dimension as an array section
8329 // but it actually is not in an array section form according to spec.
8330 if (!OASE->getColonLocFirst().isValid() &&
8331 !OASE->getColonLocSecond().isValid()) {
8332 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8333 } else {
8334 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8335 // When the length is absent it defaults to ⌈(size −
8336 // lower-bound)/stride⌉, where size is the size of the array
8337 // dimension.
8338 const Expr *StrideExpr = OASE->getStride();
8339 llvm::Value *Stride =
8340 StrideExpr
8341 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8342 CGF.Int64Ty, /*isSigned=*/false)
8343 : nullptr;
8344 if (Stride)
8345 Count = CGF.Builder.CreateUDiv(
8346 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8347 else
8348 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8349 }
8350 } else {
8351 Count = CGF.EmitScalarExpr(CountExpr);
8352 }
8353 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8354 CurCounts.push_back(Count);
8355
8356 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8357 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8358 // Offset Count Stride
8359 // D0 0 1 4 (int) <- dummy dimension
8360 // D1 0 2 8 (2 * (1) * 4)
8361 // D2 1 2 20 (1 * (1 * 5) * 4)
8362 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8363 const Expr *StrideExpr = OASE->getStride();
8364 llvm::Value *Stride =
8365 StrideExpr
8366 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8367 CGF.Int64Ty, /*isSigned=*/false)
8368 : nullptr;
8369 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8370 if (Stride)
8371 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8372 else
8373 CurStrides.push_back(DimProd);
8374 if (DI != DimSizes.end())
8375 ++DI;
8376 }
8377
8378 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8379 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8380 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8381 }
8382
8383 /// Return the adjusted map modifiers if the declaration a capture refers to
8384 /// appears in a first-private clause. This is expected to be used only with
8385 /// directives that start with 'target'.
8386 OpenMPOffloadMappingFlags
8387 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8388 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8389
8390 // A first private variable captured by reference will use only the
8391 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8392 // declaration is known as first-private in this handler.
8393 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8394 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8395 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8396 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8397 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8398 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8399 }
8400 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8401 if (I != LambdasMap.end())
8402 // for map(to: lambda): using user specified map type.
8403 return getMapTypeBits(
8404 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8405 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8406 /*AddPtrFlag=*/false,
8407 /*AddIsTargetParamFlag=*/false,
8408 /*isNonContiguous=*/false);
8409 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8410 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8411 }
8412
8413 void getPlainLayout(const CXXRecordDecl *RD,
8414 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8415 bool AsBase) const {
8416 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8417
8418 llvm::StructType *St =
8419 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8420
8421 unsigned NumElements = St->getNumElements();
8422 llvm::SmallVector<
8423 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8424 RecordLayout(NumElements);
8425
8426 // Fill bases.
8427 for (const auto &I : RD->bases()) {
8428 if (I.isVirtual())
8429 continue;
8430
8431 QualType BaseTy = I.getType();
8432 const auto *Base = BaseTy->getAsCXXRecordDecl();
8433 // Ignore empty bases.
8434 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8435 CGF.getContext()
8436 .getASTRecordLayout(Base)
8438 .isZero())
8439 continue;
8440
8441 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8442 RecordLayout[FieldIndex] = Base;
8443 }
8444 // Fill in virtual bases.
8445 for (const auto &I : RD->vbases()) {
8446 QualType BaseTy = I.getType();
8447 // Ignore empty bases.
8448 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8449 continue;
8450
8451 const auto *Base = BaseTy->getAsCXXRecordDecl();
8452 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8453 if (RecordLayout[FieldIndex])
8454 continue;
8455 RecordLayout[FieldIndex] = Base;
8456 }
8457 // Fill in all the fields.
8458 assert(!RD->isUnion() && "Unexpected union.");
8459 for (const auto *Field : RD->fields()) {
8460 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8461 // will fill in later.)
8462 if (!Field->isBitField() &&
8463 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8464 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8465 RecordLayout[FieldIndex] = Field;
8466 }
8467 }
8468 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8469 &Data : RecordLayout) {
8470 if (Data.isNull())
8471 continue;
8472 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8473 getPlainLayout(Base, Layout, /*AsBase=*/true);
8474 else
8475 Layout.push_back(cast<const FieldDecl *>(Data));
8476 }
8477 }
8478
8479 /// Returns the address corresponding to \p PointerExpr.
8480 static Address getAttachPtrAddr(const Expr *PointerExpr,
8481 CodeGenFunction &CGF) {
8482 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8483 Address AttachPtrAddr = Address::invalid();
8484
8485 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8486 // If the pointer is a variable, we can use its address directly.
8487 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8488 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8489 AttachPtrAddr =
8490 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8491 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8492 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8493 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8494 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8495 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8496 assert(UO->getOpcode() == UO_Deref &&
8497 "Unexpected unary-operator on attach-ptr-expr");
8498 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8499 }
8500 assert(AttachPtrAddr.isValid() &&
8501 "Failed to get address for attach pointer expression");
8502 return AttachPtrAddr;
8503 }
8504
8505 /// Get the address of the attach pointer, and a load from it, to get the
8506 /// pointee base address.
8507 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8508 /// contains invalid addresses if \p AttachPtrExpr is null.
8509 static std::pair<Address, Address>
8510 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8511 CodeGenFunction &CGF) {
8512
8513 if (!AttachPtrExpr)
8514 return {Address::invalid(), Address::invalid()};
8515
8516 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8517 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8518
8519 QualType AttachPtrType =
8522
8523 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8524 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8525 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8526
8527 return {AttachPtrAddr, AttachPteeBaseAddr};
8528 }
8529
8530 /// Returns whether an attach entry should be emitted for a map on
8531 /// \p MapBaseDecl on the directive \p CurDir.
8532 static bool
8533 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8534 CodeGenFunction &CGF,
8535 llvm::PointerUnion<const OMPExecutableDirective *,
8536 const OMPDeclareMapperDecl *>
8537 CurDir) {
8538 if (!PointerExpr)
8539 return false;
8540
8541 // Pointer attachment is needed at map-entering time or for declare
8542 // mappers.
8543 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8546 ->getDirectiveKind());
8547 }
8548
8549 /// Computes the attach-ptr expr for \p Components, and updates various maps
8550 /// with the information.
8551 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8552 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8553 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8554 /// AttachPtrExprMap.
8555 void collectAttachPtrExprInfo(
8557 llvm::PointerUnion<const OMPExecutableDirective *,
8558 const OMPDeclareMapperDecl *>
8559 CurDir) {
8560
8561 OpenMPDirectiveKind CurDirectiveID =
8563 ? OMPD_declare_mapper
8564 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8565
8566 const auto &[AttachPtrExpr, Depth] =
8568 CurDirectiveID);
8569
8570 AttachPtrComputationOrderMap.try_emplace(
8571 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8572 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8573 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8574 }
8575
8576 /// Generate all the base pointers, section pointers, sizes, map types, and
8577 /// mappers for the extracted mappable expressions (all included in \a
8578 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8579 /// pair of the relevant declaration and index where it occurs is appended to
8580 /// the device pointers info array.
8581 void generateAllInfoForClauses(
8582 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8583 llvm::OpenMPIRBuilder &OMPBuilder,
8584 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8585 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8586 // We have to process the component lists that relate with the same
8587 // declaration in a single chunk so that we can generate the map flags
8588 // correctly. Therefore, we organize all lists in a map.
8589 enum MapKind { Present, Allocs, Other, Total };
8590 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8591 SmallVector<SmallVector<MapInfo, 8>, 4>>
8592 Info;
8593
8594 // Helper function to fill the information map for the different supported
8595 // clauses.
8596 auto &&InfoGen =
8597 [&Info, &SkipVarSet](
8598 const ValueDecl *D, MapKind Kind,
8600 OpenMPMapClauseKind MapType,
8601 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8602 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8603 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8604 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8605 if (SkipVarSet.contains(D))
8606 return;
8607 auto It = Info.try_emplace(D, Total).first;
8608 It->second[Kind].emplace_back(
8609 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8610 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8611 };
8612
8613 for (const auto *Cl : Clauses) {
8614 const auto *C = dyn_cast<OMPMapClause>(Cl);
8615 if (!C)
8616 continue;
8617 MapKind Kind = Other;
8618 if (llvm::is_contained(C->getMapTypeModifiers(),
8619 OMPC_MAP_MODIFIER_present))
8620 Kind = Present;
8621 else if (C->getMapType() == OMPC_MAP_alloc)
8622 Kind = Allocs;
8623 const auto *EI = C->getVarRefs().begin();
8624 for (const auto L : C->component_lists()) {
8625 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8626 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8627 C->getMapTypeModifiers(), {},
8628 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8629 E);
8630 ++EI;
8631 }
8632 }
8633 for (const auto *Cl : Clauses) {
8634 const auto *C = dyn_cast<OMPToClause>(Cl);
8635 if (!C)
8636 continue;
8637 MapKind Kind = Other;
8638 if (llvm::is_contained(C->getMotionModifiers(),
8639 OMPC_MOTION_MODIFIER_present))
8640 Kind = Present;
8641 if (llvm::is_contained(C->getMotionModifiers(),
8642 OMPC_MOTION_MODIFIER_iterator)) {
8643 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8644 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8645 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8646 CGF.EmitVarDecl(*VD);
8647 }
8648 }
8649
8650 const auto *EI = C->getVarRefs().begin();
8651 for (const auto L : C->component_lists()) {
8652 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8653 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8654 C->isImplicit(), std::get<2>(L), *EI);
8655 ++EI;
8656 }
8657 }
8658 for (const auto *Cl : Clauses) {
8659 const auto *C = dyn_cast<OMPFromClause>(Cl);
8660 if (!C)
8661 continue;
8662 MapKind Kind = Other;
8663 if (llvm::is_contained(C->getMotionModifiers(),
8664 OMPC_MOTION_MODIFIER_present))
8665 Kind = Present;
8666 if (llvm::is_contained(C->getMotionModifiers(),
8667 OMPC_MOTION_MODIFIER_iterator)) {
8668 if (auto *IteratorExpr = dyn_cast<OMPIteratorExpr>(
8669 C->getIteratorModifier()->IgnoreParenImpCasts())) {
8670 const auto *VD = cast<VarDecl>(IteratorExpr->getIteratorDecl(0));
8671 CGF.EmitVarDecl(*VD);
8672 }
8673 }
8674
8675 const auto *EI = C->getVarRefs().begin();
8676 for (const auto L : C->component_lists()) {
8677 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8678 C->getMotionModifiers(),
8679 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8680 *EI);
8681 ++EI;
8682 }
8683 }
8684
8685 // Look at the use_device_ptr and use_device_addr clauses information and
8686 // mark the existing map entries as such. If there is no map information for
8687 // an entry in the use_device_ptr and use_device_addr list, we create one
8688 // with map type 'alloc' and zero size section. It is the user fault if that
8689 // was not mapped before. If there is no map information and the pointer is
8690 // a struct member, then we defer the emission of that entry until the whole
8691 // struct has been processed.
8692 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8693 SmallVector<DeferredDevicePtrEntryTy, 4>>
8694 DeferredInfo;
8695 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8696
8697 auto &&UseDeviceDataCombinedInfoGen =
8698 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8699 CodeGenFunction &CGF, bool IsDevAddr) {
8700 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8701 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8702 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8703 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8704 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8705 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8706 UseDeviceDataCombinedInfo.Sizes.push_back(
8707 llvm::Constant::getNullValue(CGF.Int64Ty));
8708 UseDeviceDataCombinedInfo.Types.push_back(
8709 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8710 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8711 };
8712
8713 auto &&MapInfoGen =
8714 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8715 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8717 Components,
8718 bool IsImplicit, bool IsDevAddr) {
8719 // We didn't find any match in our map information - generate a zero
8720 // size array section - if the pointer is a struct member we defer
8721 // this action until the whole struct has been processed.
8722 if (isa<MemberExpr>(IE)) {
8723 // Insert the pointer into Info to be processed by
8724 // generateInfoForComponentList. Because it is a member pointer
8725 // without a pointee, no entry will be generated for it, therefore
8726 // we need to generate one after the whole struct has been
8727 // processed. Nonetheless, generateInfoForComponentList must be
8728 // called to take the pointer into account for the calculation of
8729 // the range of the partial struct.
8730 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8731 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8732 IsDevAddr);
8733 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8734 } else {
8735 llvm::Value *Ptr;
8736 if (IsDevAddr) {
8737 if (IE->isGLValue())
8738 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8739 else
8740 Ptr = CGF.EmitScalarExpr(IE);
8741 } else {
8742 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8743 }
8744 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8745 }
8746 };
8747
8748 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8749 const Expr *IE, bool IsDevAddr) -> bool {
8750 // We potentially have map information for this declaration already.
8751 // Look for the first set of components that refer to it. If found,
8752 // return true.
8753 // If the first component is a member expression, we have to look into
8754 // 'this', which maps to null in the map of map information. Otherwise
8755 // look directly for the information.
8756 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8757 if (It != Info.end()) {
8758 bool Found = false;
8759 for (auto &Data : It->second) {
8760 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8761 return MI.Components.back().getAssociatedDeclaration() == VD;
8762 });
8763 // If we found a map entry, signal that the pointer has to be
8764 // returned and move on to the next declaration. Exclude cases where
8765 // the base pointer is mapped as array subscript, array section or
8766 // array shaping. The base address is passed as a pointer to base in
8767 // this case and cannot be used as a base for use_device_ptr list
8768 // item.
8769 if (CI != Data.end()) {
8770 if (IsDevAddr) {
8771 CI->ForDeviceAddr = IsDevAddr;
8772 CI->ReturnDevicePointer = true;
8773 Found = true;
8774 break;
8775 } else {
8776 auto PrevCI = std::next(CI->Components.rbegin());
8777 const auto *VarD = dyn_cast<VarDecl>(VD);
8779 isa<MemberExpr>(IE) ||
8780 !VD->getType().getNonReferenceType()->isPointerType() ||
8781 PrevCI == CI->Components.rend() ||
8782 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8783 VarD->hasLocalStorage()) {
8784 CI->ForDeviceAddr = IsDevAddr;
8785 CI->ReturnDevicePointer = true;
8786 Found = true;
8787 break;
8788 }
8789 }
8790 }
8791 }
8792 return Found;
8793 }
8794 return false;
8795 };
8796
8797 // Look at the use_device_ptr clause information and mark the existing map
8798 // entries as such. If there is no map information for an entry in the
8799 // use_device_ptr list, we create one with map type 'alloc' and zero size
8800 // section. It is the user fault if that was not mapped before. If there is
8801 // no map information and the pointer is a struct member, then we defer the
8802 // emission of that entry until the whole struct has been processed.
8803 for (const auto *Cl : Clauses) {
8804 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8805 if (!C)
8806 continue;
8807 for (const auto L : C->component_lists()) {
8809 std::get<1>(L);
8810 assert(!Components.empty() &&
8811 "Not expecting empty list of components!");
8812 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8814 const Expr *IE = Components.back().getAssociatedExpression();
8815 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8816 continue;
8817 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8818 /*IsDevAddr=*/false);
8819 }
8820 }
8821
8822 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8823 for (const auto *Cl : Clauses) {
8824 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8825 if (!C)
8826 continue;
8827 for (const auto L : C->component_lists()) {
8829 std::get<1>(L);
8830 assert(!std::get<1>(L).empty() &&
8831 "Not expecting empty list of components!");
8832 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8833 if (!Processed.insert(VD).second)
8834 continue;
8836 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8837 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8838 continue;
8839 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8840 /*IsDevAddr=*/true);
8841 }
8842 }
8843
8844 for (const auto &Data : Info) {
8845 StructRangeInfoTy PartialStruct;
8846 // Current struct information:
8847 MapCombinedInfoTy CurInfo;
8848 // Current struct base information:
8849 MapCombinedInfoTy StructBaseCurInfo;
8850 const Decl *D = Data.first;
8851 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8852 bool HasMapBasePtr = false;
8853 bool HasMapArraySec = false;
8854 if (VD && VD->getType()->isAnyPointerType()) {
8855 for (const auto &M : Data.second) {
8856 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8857 return isa_and_present<DeclRefExpr>(L.VarRef);
8858 });
8859 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8860 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8861 L.VarRef);
8862 });
8863 if (HasMapBasePtr && HasMapArraySec)
8864 break;
8865 }
8866 }
8867 for (const auto &M : Data.second) {
8868 for (const MapInfo &L : M) {
8869 assert(!L.Components.empty() &&
8870 "Not expecting declaration with no component lists.");
8871
8872 // Remember the current base pointer index.
8873 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8874 unsigned StructBasePointersIdx =
8875 StructBaseCurInfo.BasePointers.size();
8876 CurInfo.NonContigInfo.IsNonContiguous =
8877 L.Components.back().isNonContiguous();
8878 generateInfoForComponentList(
8879 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8880 CurInfo, StructBaseCurInfo, PartialStruct,
8881 /*IsFirstComponentList=*/false, L.IsImplicit,
8882 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8883 L.VarRef, /*OverlappedElements*/ {},
8884 HasMapBasePtr && HasMapArraySec);
8885
8886 // If this entry relates to a device pointer, set the relevant
8887 // declaration and add the 'return pointer' flag.
8888 if (L.ReturnDevicePointer) {
8889 // Check whether a value was added to either CurInfo or
8890 // StructBaseCurInfo and error if no value was added to either of
8891 // them:
8892 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8893 StructBasePointersIdx <
8894 StructBaseCurInfo.BasePointers.size()) &&
8895 "Unexpected number of mapped base pointers.");
8896
8897 // Choose a base pointer index which is always valid:
8898 const ValueDecl *RelevantVD =
8899 L.Components.back().getAssociatedDeclaration();
8900 assert(RelevantVD &&
8901 "No relevant declaration related with device pointer??");
8902
8903 // If StructBaseCurInfo has been updated this iteration then work on
8904 // the first new entry added to it i.e. make sure that when multiple
8905 // values are added to any of the lists, the first value added is
8906 // being modified by the assignments below (not the last value
8907 // added).
8908 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8909 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8910 RelevantVD;
8911 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8912 L.ForDeviceAddr ? DeviceInfoTy::Address
8913 : DeviceInfoTy::Pointer;
8914 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8915 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8916 } else {
8917 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8918 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8919 L.ForDeviceAddr ? DeviceInfoTy::Address
8920 : DeviceInfoTy::Pointer;
8921 CurInfo.Types[CurrentBasePointersIdx] |=
8922 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8923 }
8924 }
8925 }
8926 }
8927
8928 // Append any pending zero-length pointers which are struct members and
8929 // used with use_device_ptr or use_device_addr.
8930 auto CI = DeferredInfo.find(Data.first);
8931 if (CI != DeferredInfo.end()) {
8932 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8933 llvm::Value *BasePtr;
8934 llvm::Value *Ptr;
8935 if (L.ForDeviceAddr) {
8936 if (L.IE->isGLValue())
8937 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8938 else
8939 Ptr = this->CGF.EmitScalarExpr(L.IE);
8940 BasePtr = Ptr;
8941 // Entry is RETURN_PARAM. Also, set the placeholder value
8942 // MEMBER_OF=FFFF so that the entry is later updated with the
8943 // correct value of MEMBER_OF.
8944 CurInfo.Types.push_back(
8945 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8946 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8947 } else {
8948 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8949 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8950 L.IE->getExprLoc());
8951 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8952 // placeholder value MEMBER_OF=FFFF so that the entry is later
8953 // updated with the correct value of MEMBER_OF.
8954 CurInfo.Types.push_back(
8955 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8956 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8957 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8958 }
8959 CurInfo.Exprs.push_back(L.VD);
8960 CurInfo.BasePointers.emplace_back(BasePtr);
8961 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8962 CurInfo.DevicePointers.emplace_back(
8963 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8964 CurInfo.Pointers.push_back(Ptr);
8965 CurInfo.Sizes.push_back(
8966 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8967 CurInfo.Mappers.push_back(nullptr);
8968 }
8969 }
8970
8971 // Unify entries in one list making sure the struct mapping precedes the
8972 // individual fields:
8973 MapCombinedInfoTy UnionCurInfo;
8974 UnionCurInfo.append(StructBaseCurInfo);
8975 UnionCurInfo.append(CurInfo);
8976
8977 // If there is an entry in PartialStruct it means we have a struct with
8978 // individual members mapped. Emit an extra combined entry.
8979 if (PartialStruct.Base.isValid()) {
8980 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8981 // Emit a combined entry:
8982 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8983 /*IsMapThis*/ !VD, OMPBuilder, VD);
8984 }
8985
8986 // We need to append the results of this capture to what we already have.
8987 CombinedInfo.append(UnionCurInfo);
8988 }
8989 // Append data for use_device_ptr clauses.
8990 CombinedInfo.append(UseDeviceDataCombinedInfo);
8991 }
8992
8993public:
8994 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8995 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
8996 // Extract firstprivate clause information.
8997 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8998 for (const auto *D : C->varlist())
8999 FirstPrivateDecls.try_emplace(
9000 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
9001 // Extract implicit firstprivates from uses_allocators clauses.
9002 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
9003 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
9004 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
9005 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
9006 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
9007 /*Implicit=*/true);
9008 else if (const auto *VD = dyn_cast<VarDecl>(
9009 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
9010 ->getDecl()))
9011 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
9012 }
9013 }
9014 // Extract defaultmap clause information.
9015 for (const auto *C : Dir.getClausesOfKind<OMPDefaultmapClause>())
9016 if (C->getDefaultmapModifier() == OMPC_DEFAULTMAP_MODIFIER_firstprivate)
9017 DefaultmapFirstprivateKinds.insert(C->getDefaultmapKind());
9018 // Extract device pointer clause information.
9019 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
9020 for (auto L : C->component_lists())
9021 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
9022 // Extract device addr clause information.
9023 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
9024 for (auto L : C->component_lists())
9025 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
9026 // Extract map information.
9027 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9028 if (C->getMapType() != OMPC_MAP_to)
9029 continue;
9030 for (auto L : C->component_lists()) {
9031 const ValueDecl *VD = std::get<0>(L);
9032 const auto *RD = VD ? VD->getType()
9033 .getCanonicalType()
9034 .getNonReferenceType()
9035 ->getAsCXXRecordDecl()
9036 : nullptr;
9037 if (RD && RD->isLambda())
9038 LambdasMap.try_emplace(std::get<0>(L), C);
9039 }
9040 }
9041 }
9042
9043 /// Constructor for the declare mapper directive.
9044 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9045 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9046
9047 /// Generate code for the combined entry if we have a partially mapped struct
9048 /// and take care of the mapping flags of the arguments corresponding to
9049 /// individual struct members.
9050 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9051 MapFlagsArrayTy &CurTypes,
9052 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
9053 llvm::OpenMPIRBuilder &OMPBuilder,
9054 const ValueDecl *VD = nullptr,
9055 unsigned OffsetForMemberOfFlag = 0,
9056 bool NotTargetParams = true) const {
9057 if (CurTypes.size() == 1 &&
9058 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9059 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9060 !PartialStruct.IsArraySection)
9061 return;
9062 Address LBAddr = PartialStruct.LowestElem.second;
9063 Address HBAddr = PartialStruct.HighestElem.second;
9064 if (PartialStruct.HasCompleteRecord) {
9065 LBAddr = PartialStruct.LB;
9066 HBAddr = PartialStruct.LB;
9067 }
9068 CombinedInfo.Exprs.push_back(VD);
9069 // Base is the base of the struct
9070 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9071 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9072 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9073 // Pointer is the address of the lowest element
9074 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9075 const CXXMethodDecl *MD =
9076 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9077 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9078 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9079 // There should not be a mapper for a combined entry.
9080 if (HasBaseClass) {
9081 // OpenMP 5.2 148:21:
9082 // If the target construct is within a class non-static member function,
9083 // and a variable is an accessible data member of the object for which the
9084 // non-static data member function is invoked, the variable is treated as
9085 // if the this[:1] expression had appeared in a map clause with a map-type
9086 // of tofrom.
9087 // Emit this[:1]
9088 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9089 QualType Ty = MD->getFunctionObjectParameterType();
9090 llvm::Value *Size =
9091 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9092 /*isSigned=*/true);
9093 CombinedInfo.Sizes.push_back(Size);
9094 } else {
9095 CombinedInfo.Pointers.push_back(LB);
9096 // Size is (addr of {highest+1} element) - (addr of lowest element)
9097 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9098 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9099 HBAddr.getElementType(), HB, /*Idx0=*/1);
9100 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9101 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9102 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9103 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9104 /*isSigned=*/false);
9105 CombinedInfo.Sizes.push_back(Size);
9106 }
9107 CombinedInfo.Mappers.push_back(nullptr);
9108 // Map type is always TARGET_PARAM, if generate info for captures.
9109 CombinedInfo.Types.push_back(
9110 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9111 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9112 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9113 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9114 // If any element has the present modifier, then make sure the runtime
9115 // doesn't attempt to allocate the struct.
9116 if (CurTypes.end() !=
9117 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9118 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9119 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9120 }))
9121 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9122 // Remove TARGET_PARAM flag from the first element
9123 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9124 // If any element has the ompx_hold modifier, then make sure the runtime
9125 // uses the hold reference count for the struct as a whole so that it won't
9126 // be unmapped by an extra dynamic reference count decrement. Add it to all
9127 // elements as well so the runtime knows which reference count to check
9128 // when determining whether it's time for device-to-host transfers of
9129 // individual elements.
9130 if (CurTypes.end() !=
9131 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9132 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9133 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9134 })) {
9135 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9136 for (auto &M : CurTypes)
9137 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9138 }
9139
9140 // All other current entries will be MEMBER_OF the combined entry
9141 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9142 // 0xFFFF in the MEMBER_OF field).
9143 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9144 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9145 for (auto &M : CurTypes)
9146 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9147 }
9148
9149 /// Generate all the base pointers, section pointers, sizes, map types, and
9150 /// mappers for the extracted mappable expressions (all included in \a
9151 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9152 /// pair of the relevant declaration and index where it occurs is appended to
9153 /// the device pointers info array.
9154 void generateAllInfo(
9155 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9156 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9157 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9158 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9159 "Expect a executable directive");
9160 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9161 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9162 SkipVarSet);
9163 }
9164
9165 /// Generate all the base pointers, section pointers, sizes, map types, and
9166 /// mappers for the extracted map clauses of user-defined mapper (all included
9167 /// in \a CombinedInfo).
9168 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9169 llvm::OpenMPIRBuilder &OMPBuilder) const {
9170 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9171 "Expect a declare mapper directive");
9172 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9173 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9174 OMPBuilder);
9175 }
9176
9177 /// Emit capture info for lambdas for variables captured by reference.
9178 void generateInfoForLambdaCaptures(
9179 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9180 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9181 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9182 const auto *RD = VDType->getAsCXXRecordDecl();
9183 if (!RD || !RD->isLambda())
9184 return;
9185 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9186 CGF.getContext().getDeclAlign(VD));
9187 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9188 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9189 FieldDecl *ThisCapture = nullptr;
9190 RD->getCaptureFields(Captures, ThisCapture);
9191 if (ThisCapture) {
9192 LValue ThisLVal =
9193 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9194 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9195 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9196 VDLVal.getPointer(CGF));
9197 CombinedInfo.Exprs.push_back(VD);
9198 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9199 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9200 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9201 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9202 CombinedInfo.Sizes.push_back(
9203 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9204 CGF.Int64Ty, /*isSigned=*/true));
9205 CombinedInfo.Types.push_back(
9206 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9207 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9208 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9209 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9210 CombinedInfo.Mappers.push_back(nullptr);
9211 }
9212 for (const LambdaCapture &LC : RD->captures()) {
9213 if (!LC.capturesVariable())
9214 continue;
9215 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9216 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9217 continue;
9218 auto It = Captures.find(VD);
9219 assert(It != Captures.end() && "Found lambda capture without field.");
9220 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9221 if (LC.getCaptureKind() == LCK_ByRef) {
9222 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9223 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9224 VDLVal.getPointer(CGF));
9225 CombinedInfo.Exprs.push_back(VD);
9226 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9227 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9228 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9229 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9230 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9231 CGF.getTypeSize(
9233 CGF.Int64Ty, /*isSigned=*/true));
9234 } else {
9235 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9236 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9237 VDLVal.getPointer(CGF));
9238 CombinedInfo.Exprs.push_back(VD);
9239 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9240 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9241 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9242 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9243 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9244 }
9245 CombinedInfo.Types.push_back(
9246 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9247 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9248 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9249 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9250 CombinedInfo.Mappers.push_back(nullptr);
9251 }
9252 }
9253
9254 /// Set correct indices for lambdas captures.
9255 void adjustMemberOfForLambdaCaptures(
9256 llvm::OpenMPIRBuilder &OMPBuilder,
9257 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9258 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9259 MapFlagsArrayTy &Types) const {
9260 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9261 // Set correct member_of idx for all implicit lambda captures.
9262 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9263 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9264 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9265 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9266 continue;
9267 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9268 assert(BasePtr && "Unable to find base lambda address.");
9269 int TgtIdx = -1;
9270 for (unsigned J = I; J > 0; --J) {
9271 unsigned Idx = J - 1;
9272 if (Pointers[Idx] != BasePtr)
9273 continue;
9274 TgtIdx = Idx;
9275 break;
9276 }
9277 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9278 // All other current entries will be MEMBER_OF the combined entry
9279 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9280 // 0xFFFF in the MEMBER_OF field).
9281 OpenMPOffloadMappingFlags MemberOfFlag =
9282 OMPBuilder.getMemberOfFlag(TgtIdx);
9283 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9284 }
9285 }
9286
9287 /// For a capture that has an associated clause, generate the base pointers,
9288 /// section pointers, sizes, map types, and mappers (all included in
9289 /// \a CurCaptureVarInfo).
9290 void generateInfoForCaptureFromClauseInfo(
9291 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9292 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9293 unsigned OffsetForMemberOfFlag) const {
9294 assert(!Cap->capturesVariableArrayType() &&
9295 "Not expecting to generate map info for a variable array type!");
9296
9297 // We need to know when we generating information for the first component
9298 const ValueDecl *VD = Cap->capturesThis()
9299 ? nullptr
9300 : Cap->getCapturedVar()->getCanonicalDecl();
9301
9302 // for map(to: lambda): skip here, processing it in
9303 // generateDefaultMapInfo
9304 if (LambdasMap.count(VD))
9305 return;
9306
9307 // If this declaration appears in a is_device_ptr clause we just have to
9308 // pass the pointer by value. If it is a reference to a declaration, we just
9309 // pass its value.
9310 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9311 CurCaptureVarInfo.Exprs.push_back(VD);
9312 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9313 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9314 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9315 CurCaptureVarInfo.Pointers.push_back(Arg);
9316 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9317 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9318 /*isSigned=*/true));
9319 CurCaptureVarInfo.Types.push_back(
9320 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9321 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9322 CurCaptureVarInfo.Mappers.push_back(nullptr);
9323 return;
9324 }
9325
9326 MapDataArrayTy DeclComponentLists;
9327 // For member fields list in is_device_ptr, store it in
9328 // DeclComponentLists for generating components info.
9330 auto It = DevPointersMap.find(VD);
9331 if (It != DevPointersMap.end())
9332 for (const auto &MCL : It->second)
9333 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9334 /*IsImpicit = */ true, nullptr,
9335 nullptr);
9336 auto I = HasDevAddrsMap.find(VD);
9337 if (I != HasDevAddrsMap.end())
9338 for (const auto &MCL : I->second)
9339 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9340 /*IsImpicit = */ true, nullptr,
9341 nullptr);
9342 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9343 "Expect a executable directive");
9344 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9345 bool HasMapBasePtr = false;
9346 bool HasMapArraySec = false;
9347 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9348 const auto *EI = C->getVarRefs().begin();
9349 for (const auto L : C->decl_component_lists(VD)) {
9350 const ValueDecl *VDecl, *Mapper;
9351 // The Expression is not correct if the mapping is implicit
9352 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9354 std::tie(VDecl, Components, Mapper) = L;
9355 assert(VDecl == VD && "We got information for the wrong declaration??");
9356 assert(!Components.empty() &&
9357 "Not expecting declaration with no component lists.");
9358 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
9359 HasMapBasePtr = true;
9360 if (VD && E && VD->getType()->isAnyPointerType() &&
9362 HasMapArraySec = true;
9363 DeclComponentLists.emplace_back(Components, C->getMapType(),
9364 C->getMapTypeModifiers(),
9365 C->isImplicit(), Mapper, E);
9366 ++EI;
9367 }
9368 }
9369 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9370 const MapData &RHS) {
9371 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9372 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9373 bool HasPresent =
9374 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9375 bool HasAllocs = MapType == OMPC_MAP_alloc;
9376 MapModifiers = std::get<2>(RHS);
9377 MapType = std::get<1>(LHS);
9378 bool HasPresentR =
9379 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9380 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9381 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9382 });
9383
9384 auto GenerateInfoForComponentLists =
9385 [&](ArrayRef<MapData> DeclComponentLists,
9386 bool IsEligibleForTargetParamFlag) {
9387 MapCombinedInfoTy CurInfoForComponentLists;
9388 StructRangeInfoTy PartialStruct;
9389
9390 if (DeclComponentLists.empty())
9391 return;
9392
9393 generateInfoForCaptureFromComponentLists(
9394 VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
9395 IsEligibleForTargetParamFlag,
9396 /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
9397
9398 // If there is an entry in PartialStruct it means we have a
9399 // struct with individual members mapped. Emit an extra combined
9400 // entry.
9401 if (PartialStruct.Base.isValid()) {
9402 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9403 emitCombinedEntry(
9404 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9405 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
9406 OffsetForMemberOfFlag,
9407 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9408 }
9409
9410 // Return if we didn't add any entries.
9411 if (CurInfoForComponentLists.BasePointers.empty())
9412 return;
9413
9414 CurCaptureVarInfo.append(CurInfoForComponentLists);
9415 };
9416
9417 GenerateInfoForComponentLists(DeclComponentLists,
9418 /*IsEligibleForTargetParamFlag=*/true);
9419 }
9420
9421 /// Generate the base pointers, section pointers, sizes, map types, and
9422 /// mappers associated to \a DeclComponentLists for a given capture
9423 /// \a VD (all included in \a CurComponentListInfo).
9424 void generateInfoForCaptureFromComponentLists(
9425 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
9426 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
9427 bool IsListEligibleForTargetParamFlag,
9428 bool AreBothBasePtrAndPteeMapped = false) const {
9429 // Find overlapping elements (including the offset from the base element).
9430 llvm::SmallDenseMap<
9431 const MapData *,
9432 llvm::SmallVector<
9434 4>
9435 OverlappedData;
9436 size_t Count = 0;
9437 for (const MapData &L : DeclComponentLists) {
9439 OpenMPMapClauseKind MapType;
9440 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9441 bool IsImplicit;
9442 const ValueDecl *Mapper;
9443 const Expr *VarRef;
9444 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9445 L;
9446 ++Count;
9447 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9449 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9450 VarRef) = L1;
9451 auto CI = Components.rbegin();
9452 auto CE = Components.rend();
9453 auto SI = Components1.rbegin();
9454 auto SE = Components1.rend();
9455 for (; CI != CE && SI != SE; ++CI, ++SI) {
9456 if (CI->getAssociatedExpression()->getStmtClass() !=
9457 SI->getAssociatedExpression()->getStmtClass())
9458 break;
9459 // Are we dealing with different variables/fields?
9460 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9461 break;
9462 }
9463 // Found overlapping if, at least for one component, reached the head
9464 // of the components list.
9465 if (CI == CE || SI == SE) {
9466 // Ignore it if it is the same component.
9467 if (CI == CE && SI == SE)
9468 continue;
9469 const auto It = (SI == SE) ? CI : SI;
9470 // If one component is a pointer and another one is a kind of
9471 // dereference of this pointer (array subscript, section, dereference,
9472 // etc.), it is not an overlapping.
9473 // Same, if one component is a base and another component is a
9474 // dereferenced pointer memberexpr with the same base.
9475 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9476 (std::prev(It)->getAssociatedDeclaration() &&
9477 std::prev(It)
9478 ->getAssociatedDeclaration()
9479 ->getType()
9480 ->isPointerType()) ||
9481 (It->getAssociatedDeclaration() &&
9482 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9483 std::next(It) != CE && std::next(It) != SE))
9484 continue;
9485 const MapData &BaseData = CI == CE ? L : L1;
9487 SI == SE ? Components : Components1;
9488 OverlappedData[&BaseData].push_back(SubData);
9489 }
9490 }
9491 }
9492 // Sort the overlapped elements for each item.
9493 llvm::SmallVector<const FieldDecl *, 4> Layout;
9494 if (!OverlappedData.empty()) {
9495 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9496 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9497 while (BaseType != OrigType) {
9498 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9499 OrigType = BaseType->getPointeeOrArrayElementType();
9500 }
9501
9502 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9503 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9504 else {
9505 const auto *RD = BaseType->getAsRecordDecl();
9506 Layout.append(RD->field_begin(), RD->field_end());
9507 }
9508 }
9509 for (auto &Pair : OverlappedData) {
9510 llvm::stable_sort(
9511 Pair.getSecond(),
9512 [&Layout](
9515 Second) {
9516 auto CI = First.rbegin();
9517 auto CE = First.rend();
9518 auto SI = Second.rbegin();
9519 auto SE = Second.rend();
9520 for (; CI != CE && SI != SE; ++CI, ++SI) {
9521 if (CI->getAssociatedExpression()->getStmtClass() !=
9522 SI->getAssociatedExpression()->getStmtClass())
9523 break;
9524 // Are we dealing with different variables/fields?
9525 if (CI->getAssociatedDeclaration() !=
9526 SI->getAssociatedDeclaration())
9527 break;
9528 }
9529
9530 // Lists contain the same elements.
9531 if (CI == CE && SI == SE)
9532 return false;
9533
9534 // List with less elements is less than list with more elements.
9535 if (CI == CE || SI == SE)
9536 return CI == CE;
9537
9538 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9539 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9540 if (FD1->getParent() == FD2->getParent())
9541 return FD1->getFieldIndex() < FD2->getFieldIndex();
9542 const auto *It =
9543 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9544 return FD == FD1 || FD == FD2;
9545 });
9546 return *It == FD1;
9547 });
9548 }
9549
9550 // Associated with a capture, because the mapping flags depend on it.
9551 // Go through all of the elements with the overlapped elements.
9552 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9553 MapCombinedInfoTy StructBaseCombinedInfo;
9554 for (const auto &Pair : OverlappedData) {
9555 const MapData &L = *Pair.getFirst();
9557 OpenMPMapClauseKind MapType;
9558 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9559 bool IsImplicit;
9560 const ValueDecl *Mapper;
9561 const Expr *VarRef;
9562 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9563 L;
9564 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9565 OverlappedComponents = Pair.getSecond();
9566 generateInfoForComponentList(
9567 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9568 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9569 /*GenerateAllInfoForClauses*/ false, Mapper,
9570 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9571 AddTargetParamFlag = false;
9572 }
9573 // Go through other elements without overlapped elements.
9574 for (const MapData &L : DeclComponentLists) {
9576 OpenMPMapClauseKind MapType;
9577 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9578 bool IsImplicit;
9579 const ValueDecl *Mapper;
9580 const Expr *VarRef;
9581 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9582 L;
9583 auto It = OverlappedData.find(&L);
9584 if (It == OverlappedData.end())
9585 generateInfoForComponentList(
9586 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9587 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9588 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9589 /*ForDeviceAddr=*/false, VD, VarRef,
9590 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9591 AddTargetParamFlag = false;
9592 }
9593 }
9594
9595 /// Check if a variable should be treated as firstprivate due to explicit
9596 /// firstprivate clause or defaultmap(firstprivate:...).
9597 bool isEffectivelyFirstprivate(const VarDecl *VD, QualType Type) const {
9598 // Check explicit firstprivate clauses (not implicit from defaultmap)
9599 auto I = FirstPrivateDecls.find(VD);
9600 if (I != FirstPrivateDecls.end() && !I->getSecond())
9601 return true; // Explicit firstprivate only
9602
9603 // Check defaultmap(firstprivate:scalar) for scalar types
9604 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_scalar)) {
9605 if (Type->isScalarType())
9606 return true;
9607 }
9608
9609 // Check defaultmap(firstprivate:pointer) for pointer types
9610 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_pointer)) {
9611 if (Type->isAnyPointerType())
9612 return true;
9613 }
9614
9615 // Check defaultmap(firstprivate:aggregate) for aggregate types
9616 if (DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_aggregate)) {
9617 if (Type->isAggregateType())
9618 return true;
9619 }
9620
9621 // Check defaultmap(firstprivate:all) for all types
9622 return DefaultmapFirstprivateKinds.count(OMPC_DEFAULTMAP_all);
9623 }
9624
9625 /// Generate the default map information for a given capture \a CI,
9626 /// record field declaration \a RI and captured value \a CV.
9627 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9628 const FieldDecl &RI, llvm::Value *CV,
9629 MapCombinedInfoTy &CombinedInfo) const {
9630 bool IsImplicit = true;
9631 // Do the default mapping.
9632 if (CI.capturesThis()) {
9633 CombinedInfo.Exprs.push_back(nullptr);
9634 CombinedInfo.BasePointers.push_back(CV);
9635 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9636 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9637 CombinedInfo.Pointers.push_back(CV);
9638 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9639 CombinedInfo.Sizes.push_back(
9640 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9641 CGF.Int64Ty, /*isSigned=*/true));
9642 // Default map type.
9643 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9644 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9645 } else if (CI.capturesVariableByCopy()) {
9646 const VarDecl *VD = CI.getCapturedVar();
9647 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9648 CombinedInfo.BasePointers.push_back(CV);
9649 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9650 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9651 CombinedInfo.Pointers.push_back(CV);
9652 bool IsFirstprivate =
9653 isEffectivelyFirstprivate(VD, RI.getType().getNonReferenceType());
9654
9655 if (!RI.getType()->isAnyPointerType()) {
9656 // We have to signal to the runtime captures passed by value that are
9657 // not pointers.
9658 CombinedInfo.Types.push_back(
9659 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9660 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9661 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9662 } else if (IsFirstprivate) {
9663 // Firstprivate pointers should be passed by value (as literals)
9664 // without performing a present table lookup at runtime.
9665 CombinedInfo.Types.push_back(
9666 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9667 // Use zero size for pointer literals (just passing the pointer value)
9668 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9669 } else {
9670 // Pointers are implicitly mapped with a zero size and no flags
9671 // (other than first map that is added for all implicit maps).
9672 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9673 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9674 }
9675 auto I = FirstPrivateDecls.find(VD);
9676 if (I != FirstPrivateDecls.end())
9677 IsImplicit = I->getSecond();
9678 } else {
9679 assert(CI.capturesVariable() && "Expected captured reference.");
9680 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9681 QualType ElementType = PtrTy->getPointeeType();
9682 const VarDecl *VD = CI.getCapturedVar();
9683 bool IsFirstprivate = isEffectivelyFirstprivate(VD, ElementType);
9684 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9685 CombinedInfo.BasePointers.push_back(CV);
9686 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9687 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9688
9689 // For firstprivate pointers, pass by value instead of dereferencing
9690 if (IsFirstprivate && ElementType->isAnyPointerType()) {
9691 // Treat as a literal value (pass the pointer value itself)
9692 CombinedInfo.Pointers.push_back(CV);
9693 // Use zero size for pointer literals
9694 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9695 CombinedInfo.Types.push_back(
9696 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9697 } else {
9698 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9699 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9700 // The default map type for a scalar/complex type is 'to' because by
9701 // default the value doesn't have to be retrieved. For an aggregate
9702 // type, the default is 'tofrom'.
9703 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9704 CombinedInfo.Pointers.push_back(CV);
9705 }
9706 auto I = FirstPrivateDecls.find(VD);
9707 if (I != FirstPrivateDecls.end())
9708 IsImplicit = I->getSecond();
9709 }
9710 // Every default map produces a single argument which is a target parameter.
9711 CombinedInfo.Types.back() |=
9712 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9713
9714 // Add flag stating this is an implicit map.
9715 if (IsImplicit)
9716 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9717
9718 // No user-defined mapper for default mapping.
9719 CombinedInfo.Mappers.push_back(nullptr);
9720 }
9721};
9722} // anonymous namespace
9723
9724// Try to extract the base declaration from a `this->x` expression if possible.
9726 if (!E)
9727 return nullptr;
9728
9729 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9730 if (const MemberExpr *ME =
9731 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9732 return ME->getMemberDecl();
9733 return nullptr;
9734}
9735
9736/// Emit a string constant containing the names of the values mapped to the
9737/// offloading runtime library.
9738static llvm::Constant *
9739emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9740 MappableExprsHandler::MappingExprInfo &MapExprs) {
9741
9742 uint32_t SrcLocStrSize;
9743 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9744 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9745
9746 SourceLocation Loc;
9747 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9748 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9749 Loc = VD->getLocation();
9750 else
9751 Loc = MapExprs.getMapExpr()->getExprLoc();
9752 } else {
9753 Loc = MapExprs.getMapDecl()->getLocation();
9754 }
9755
9756 std::string ExprName;
9757 if (MapExprs.getMapExpr()) {
9759 llvm::raw_string_ostream OS(ExprName);
9760 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9761 } else {
9762 ExprName = MapExprs.getMapDecl()->getNameAsString();
9763 }
9764
9765 std::string FileName;
9767 if (auto *DbgInfo = CGF.getDebugInfo())
9768 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9769 else
9770 FileName = PLoc.getFilename();
9771 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9772 PLoc.getColumn(), SrcLocStrSize);
9773}
9774/// Emit the arrays used to pass the captures and map information to the
9775/// offloading runtime library. If there is no map or capture information,
9776/// return nullptr by reference.
9778 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9779 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9780 bool IsNonContiguous = false, bool ForEndCall = false) {
9781 CodeGenModule &CGM = CGF.CGM;
9782
9783 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9784 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9785 CGF.AllocaInsertPt->getIterator());
9786 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9787 CGF.Builder.GetInsertPoint());
9788
9789 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9790 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9791 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9792 }
9793 };
9794
9795 auto CustomMapperCB = [&](unsigned int I) {
9796 llvm::Function *MFunc = nullptr;
9797 if (CombinedInfo.Mappers[I]) {
9798 Info.HasMapper = true;
9800 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9801 }
9802 return MFunc;
9803 };
9804 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9805 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9806 IsNonContiguous, ForEndCall, DeviceAddrCB));
9807}
9808
9809/// Check for inner distribute directive.
9810static const OMPExecutableDirective *
9812 const auto *CS = D.getInnermostCapturedStmt();
9813 const auto *Body =
9814 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9815 const Stmt *ChildStmt =
9817
9818 if (const auto *NestedDir =
9819 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9820 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9821 switch (D.getDirectiveKind()) {
9822 case OMPD_target:
9823 // For now, treat 'target' with nested 'teams loop' as if it's
9824 // distributed (target teams distribute).
9825 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9826 return NestedDir;
9827 if (DKind == OMPD_teams) {
9828 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9829 /*IgnoreCaptured=*/true);
9830 if (!Body)
9831 return nullptr;
9832 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9833 if (const auto *NND =
9834 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9835 DKind = NND->getDirectiveKind();
9836 if (isOpenMPDistributeDirective(DKind))
9837 return NND;
9838 }
9839 }
9840 return nullptr;
9841 case OMPD_target_teams:
9842 if (isOpenMPDistributeDirective(DKind))
9843 return NestedDir;
9844 return nullptr;
9845 case OMPD_target_parallel:
9846 case OMPD_target_simd:
9847 case OMPD_target_parallel_for:
9848 case OMPD_target_parallel_for_simd:
9849 return nullptr;
9850 case OMPD_target_teams_distribute:
9851 case OMPD_target_teams_distribute_simd:
9852 case OMPD_target_teams_distribute_parallel_for:
9853 case OMPD_target_teams_distribute_parallel_for_simd:
9854 case OMPD_parallel:
9855 case OMPD_for:
9856 case OMPD_parallel_for:
9857 case OMPD_parallel_master:
9858 case OMPD_parallel_sections:
9859 case OMPD_for_simd:
9860 case OMPD_parallel_for_simd:
9861 case OMPD_cancel:
9862 case OMPD_cancellation_point:
9863 case OMPD_ordered:
9864 case OMPD_threadprivate:
9865 case OMPD_allocate:
9866 case OMPD_task:
9867 case OMPD_simd:
9868 case OMPD_tile:
9869 case OMPD_unroll:
9870 case OMPD_sections:
9871 case OMPD_section:
9872 case OMPD_single:
9873 case OMPD_master:
9874 case OMPD_critical:
9875 case OMPD_taskyield:
9876 case OMPD_barrier:
9877 case OMPD_taskwait:
9878 case OMPD_taskgroup:
9879 case OMPD_atomic:
9880 case OMPD_flush:
9881 case OMPD_depobj:
9882 case OMPD_scan:
9883 case OMPD_teams:
9884 case OMPD_target_data:
9885 case OMPD_target_exit_data:
9886 case OMPD_target_enter_data:
9887 case OMPD_distribute:
9888 case OMPD_distribute_simd:
9889 case OMPD_distribute_parallel_for:
9890 case OMPD_distribute_parallel_for_simd:
9891 case OMPD_teams_distribute:
9892 case OMPD_teams_distribute_simd:
9893 case OMPD_teams_distribute_parallel_for:
9894 case OMPD_teams_distribute_parallel_for_simd:
9895 case OMPD_target_update:
9896 case OMPD_declare_simd:
9897 case OMPD_declare_variant:
9898 case OMPD_begin_declare_variant:
9899 case OMPD_end_declare_variant:
9900 case OMPD_declare_target:
9901 case OMPD_end_declare_target:
9902 case OMPD_declare_reduction:
9903 case OMPD_declare_mapper:
9904 case OMPD_taskloop:
9905 case OMPD_taskloop_simd:
9906 case OMPD_master_taskloop:
9907 case OMPD_master_taskloop_simd:
9908 case OMPD_parallel_master_taskloop:
9909 case OMPD_parallel_master_taskloop_simd:
9910 case OMPD_requires:
9911 case OMPD_metadirective:
9912 case OMPD_unknown:
9913 default:
9914 llvm_unreachable("Unexpected directive.");
9915 }
9916 }
9917
9918 return nullptr;
9919}
9920
9921/// Emit the user-defined mapper function. The code generation follows the
9922/// pattern in the example below.
9923/// \code
9924/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9925/// void *base, void *begin,
9926/// int64_t size, int64_t type,
9927/// void *name = nullptr) {
9928/// // Allocate space for an array section first or add a base/begin for
9929/// // pointer dereference.
9930/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9931/// !maptype.IsDelete)
9932/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9933/// size*sizeof(Ty), clearToFromMember(type));
9934/// // Map members.
9935/// for (unsigned i = 0; i < size; i++) {
9936/// // For each component specified by this mapper:
9937/// for (auto c : begin[i]->all_components) {
9938/// if (c.hasMapper())
9939/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9940/// c.arg_type, c.arg_name);
9941/// else
9942/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9943/// c.arg_begin, c.arg_size, c.arg_type,
9944/// c.arg_name);
9945/// }
9946/// }
9947/// // Delete the array section.
9948/// if (size > 1 && maptype.IsDelete)
9949/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9950/// size*sizeof(Ty), clearToFromMember(type));
9951/// }
9952/// \endcode
9954 CodeGenFunction *CGF) {
9955 if (UDMMap.count(D) > 0)
9956 return;
9957 ASTContext &C = CGM.getContext();
9958 QualType Ty = D->getType();
9959 auto *MapperVarDecl =
9961 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9962 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9963
9964 CodeGenFunction MapperCGF(CGM);
9965 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9966 auto PrivatizeAndGenMapInfoCB =
9967 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9968 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9969 MapperCGF.Builder.restoreIP(CodeGenIP);
9970
9971 // Privatize the declared variable of mapper to be the current array
9972 // element.
9973 Address PtrCurrent(
9974 PtrPHI, ElemTy,
9975 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9976 .getAlignment()
9977 .alignmentOfArrayElement(ElementSize));
9979 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9980 (void)Scope.Privatize();
9981
9982 // Get map clause information.
9983 MappableExprsHandler MEHandler(*D, MapperCGF);
9984 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9985
9986 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9987 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9988 };
9989 if (CGM.getCodeGenOpts().getDebugInfo() !=
9990 llvm::codegenoptions::NoDebugInfo) {
9991 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9992 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9993 FillInfoMap);
9994 }
9995
9996 return CombinedInfo;
9997 };
9998
9999 auto CustomMapperCB = [&](unsigned I) {
10000 llvm::Function *MapperFunc = nullptr;
10001 if (CombinedInfo.Mappers[I]) {
10002 // Call the corresponding mapper function.
10004 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10005 assert(MapperFunc && "Expect a valid mapper function is available.");
10006 }
10007 return MapperFunc;
10008 };
10009
10010 SmallString<64> TyStr;
10011 llvm::raw_svector_ostream Out(TyStr);
10012 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
10013 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
10014
10015 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
10016 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
10017 UDMMap.try_emplace(D, NewFn);
10018 if (CGF)
10019 FunctionUDMMap[CGF->CurFn].push_back(D);
10020}
10021
10023 const OMPDeclareMapperDecl *D) {
10024 auto I = UDMMap.find(D);
10025 if (I != UDMMap.end())
10026 return I->second;
10028 return UDMMap.lookup(D);
10029}
10030
10033 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10034 const OMPLoopDirective &D)>
10035 SizeEmitter) {
10036 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10037 const OMPExecutableDirective *TD = &D;
10038 // Get nested teams distribute kind directive, if any. For now, treat
10039 // 'target_teams_loop' as if it's really a target_teams_distribute.
10040 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
10041 Kind != OMPD_target_teams_loop)
10042 TD = getNestedDistributeDirective(CGM.getContext(), D);
10043 if (!TD)
10044 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10045
10046 const auto *LD = cast<OMPLoopDirective>(TD);
10047 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10048 return NumIterations;
10049 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10050}
10051
10052static void
10053emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10054 const OMPExecutableDirective &D,
10056 bool RequiresOuterTask, const CapturedStmt &CS,
10057 bool OffloadingMandatory, CodeGenFunction &CGF) {
10058 if (OffloadingMandatory) {
10059 CGF.Builder.CreateUnreachable();
10060 } else {
10061 if (RequiresOuterTask) {
10062 CapturedVars.clear();
10063 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10064 }
10065 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
10066 CapturedVars);
10067 }
10068}
10069
10070static llvm::Value *emitDeviceID(
10071 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10072 CodeGenFunction &CGF) {
10073 // Emit device ID if any.
10074 llvm::Value *DeviceID;
10075 if (Device.getPointer()) {
10076 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10077 Device.getInt() == OMPC_DEVICE_device_num) &&
10078 "Expected device_num modifier.");
10079 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10080 DeviceID =
10081 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10082 } else {
10083 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10084 }
10085 return DeviceID;
10086}
10087
10088static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10090 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10091 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10092
10093 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10094 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10095 llvm::Value *DynGPVal =
10096 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10097 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10098 /*isSigned=*/false);
10099 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10100 switch (FallbackModifier) {
10101 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10102 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10103 break;
10104 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10105 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10106 break;
10107 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10109 // This is the default for dyn_groupprivate.
10110 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10111 break;
10112 default:
10113 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10114 }
10115 } else if (auto *OMPXDynCGClause =
10116 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10117 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10118 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10119 /*IgnoreResultAssign=*/true);
10120 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10121 /*isSigned=*/false);
10122 }
10123 return {DynGP, DynGPFallback};
10124}
10125
10127 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10128 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10129 llvm::OpenMPIRBuilder &OMPBuilder,
10130 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10131 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10132
10133 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10134 auto RI = CS.getCapturedRecordDecl()->field_begin();
10135 auto *CV = CapturedVars.begin();
10137 CE = CS.capture_end();
10138 CI != CE; ++CI, ++RI, ++CV) {
10139 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10140
10141 // VLA sizes are passed to the outlined region by copy and do not have map
10142 // information associated.
10143 if (CI->capturesVariableArrayType()) {
10144 CurInfo.Exprs.push_back(nullptr);
10145 CurInfo.BasePointers.push_back(*CV);
10146 CurInfo.DevicePtrDecls.push_back(nullptr);
10147 CurInfo.DevicePointers.push_back(
10148 MappableExprsHandler::DeviceInfoTy::None);
10149 CurInfo.Pointers.push_back(*CV);
10150 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10151 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10152 // Copy to the device as an argument. No need to retrieve it.
10153 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10154 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10155 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10156 CurInfo.Mappers.push_back(nullptr);
10157 } else {
10158 // If we have any information in the map clause, we use it, otherwise we
10159 // just do a default mapping.
10160 MEHandler.generateInfoForCaptureFromClauseInfo(
10161 CI, *CV, CurInfo, OMPBuilder,
10162 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10163
10164 if (!CI->capturesThis())
10165 MappedVarSet.insert(CI->getCapturedVar());
10166 else
10167 MappedVarSet.insert(nullptr);
10168
10169 if (CurInfo.BasePointers.empty())
10170 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10171
10172 // Generate correct mapping for variables captured by reference in
10173 // lambdas.
10174 if (CI->capturesVariable())
10175 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10176 CurInfo, LambdaPointers);
10177 }
10178 // We expect to have at least an element of information for this capture.
10179 assert(!CurInfo.BasePointers.empty() &&
10180 "Non-existing map pointer for capture!");
10181 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10182 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10183 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10184 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10185 "Inconsistent map information sizes!");
10186
10187 // We need to append the results of this capture to what we already have.
10188 CombinedInfo.append(CurInfo);
10189 }
10190 // Adjust MEMBER_OF flags for the lambdas captures.
10191 MEHandler.adjustMemberOfForLambdaCaptures(
10192 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10193 CombinedInfo.Pointers, CombinedInfo.Types);
10194}
10195static void
10196genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10197 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10198 llvm::OpenMPIRBuilder &OMPBuilder,
10199 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10200 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10201
10202 CodeGenModule &CGM = CGF.CGM;
10203 // Map any list items in a map clause that were not captures because they
10204 // weren't referenced within the construct.
10205 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10206
10207 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10208 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10209 };
10210 if (CGM.getCodeGenOpts().getDebugInfo() !=
10211 llvm::codegenoptions::NoDebugInfo) {
10212 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10213 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10214 FillInfoMap);
10215 }
10216}
10217
10219 const CapturedStmt &CS,
10221 llvm::OpenMPIRBuilder &OMPBuilder,
10222 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10223 // Get mappable expression information.
10224 MappableExprsHandler MEHandler(D, CGF);
10225 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10226
10227 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10228 MappedVarSet, CombinedInfo);
10229 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10230}
10231
10232template <typename ClauseTy>
10233static void
10235 const OMPExecutableDirective &D,
10237 const auto *C = D.getSingleClause<ClauseTy>();
10238 assert(!C->varlist_empty() &&
10239 "ompx_bare requires explicit num_teams and thread_limit");
10241 for (auto *E : C->varlist()) {
10242 llvm::Value *V = CGF.EmitScalarExpr(E);
10243 Values.push_back(
10244 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10245 }
10246}
10247
10249 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10250 const OMPExecutableDirective &D,
10251 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10252 const CapturedStmt &CS, bool OffloadingMandatory,
10253 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10254 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10255 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10256 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10257 const OMPLoopDirective &D)>
10258 SizeEmitter,
10259 CodeGenFunction &CGF, CodeGenModule &CGM) {
10260 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10261
10262 // Fill up the arrays with all the captured variables.
10263 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10265 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10266
10267 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10268 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10269
10270 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10271 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10272 CGF.VoidPtrTy, CGM.getPointerAlign());
10273 InputInfo.PointersArray =
10274 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10275 InputInfo.SizesArray =
10276 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10277 InputInfo.MappersArray =
10278 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10279 MapTypesArray = Info.RTArgs.MapTypesArray;
10280 MapNamesArray = Info.RTArgs.MapNamesArray;
10281
10282 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10283 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10284 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10285 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10286 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10287
10288 if (IsReverseOffloading) {
10289 // Reverse offloading is not supported, so just execute on the host.
10290 // FIXME: This fallback solution is incorrect since it ignores the
10291 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10292 // assert here and ensure SEMA emits an error.
10293 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10294 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10295 return;
10296 }
10297
10298 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10299 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10300
10301 llvm::Value *BasePointersArray =
10302 InputInfo.BasePointersArray.emitRawPointer(CGF);
10303 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10304 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10305 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10306
10307 auto &&EmitTargetCallFallbackCB =
10308 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10309 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10310 -> llvm::OpenMPIRBuilder::InsertPointTy {
10311 CGF.Builder.restoreIP(IP);
10312 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10313 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10314 return CGF.Builder.saveIP();
10315 };
10316
10317 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10320 if (IsBare) {
10323 NumThreads);
10324 } else {
10325 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10326 NumThreads.push_back(
10327 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10328 }
10329
10330 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10331 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10332 llvm::Value *NumIterations =
10333 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10334 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10335 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10336 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10337
10338 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10339 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10340 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10341
10342 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10343 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10344 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10345
10346 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10347 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10348 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10349 RTLoc, AllocaIP));
10350 CGF.Builder.restoreIP(AfterIP);
10351 };
10352
10353 if (RequiresOuterTask)
10354 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10355 else
10356 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10357}
10358
10359static void
10360emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10361 const OMPExecutableDirective &D,
10363 bool RequiresOuterTask, const CapturedStmt &CS,
10364 bool OffloadingMandatory, CodeGenFunction &CGF) {
10365
10366 // Notify that the host version must be executed.
10367 auto &&ElseGen =
10368 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10369 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10370 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10371 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10372 };
10373
10374 if (RequiresOuterTask) {
10376 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10377 } else {
10378 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10379 }
10380}
10381
10384 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10385 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10386 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10387 const OMPLoopDirective &D)>
10388 SizeEmitter) {
10389 if (!CGF.HaveInsertPoint())
10390 return;
10391
10392 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
10393 CGM.getLangOpts().OpenMPOffloadMandatory;
10394
10395 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10396
10397 const bool RequiresOuterTask =
10398 D.hasClausesOfKind<OMPDependClause>() ||
10399 D.hasClausesOfKind<OMPNowaitClause>() ||
10400 D.hasClausesOfKind<OMPInReductionClause>() ||
10401 (CGM.getLangOpts().OpenMP >= 51 &&
10402 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
10403 D.hasClausesOfKind<OMPThreadLimitClause>());
10405 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10406 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10407 PrePostActionTy &) {
10408 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10409 };
10410 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10411
10413 llvm::Value *MapTypesArray = nullptr;
10414 llvm::Value *MapNamesArray = nullptr;
10415
10416 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
10417 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10418 OutlinedFnID, &InputInfo, &MapTypesArray,
10419 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
10420 PrePostActionTy &) {
10421 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
10422 RequiresOuterTask, CS, OffloadingMandatory,
10423 Device, OutlinedFnID, InputInfo, MapTypesArray,
10424 MapNamesArray, SizeEmitter, CGF, CGM);
10425 };
10426
10427 auto &&TargetElseGen =
10428 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10429 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10430 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
10431 CS, OffloadingMandatory, CGF);
10432 };
10433
10434 // If we have a target function ID it means that we need to support
10435 // offloading, otherwise, just execute on the host. We need to execute on host
10436 // regardless of the conditional in the if clause if, e.g., the user do not
10437 // specify target triples.
10438 if (OutlinedFnID) {
10439 if (IfCond) {
10440 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10441 } else {
10442 RegionCodeGenTy ThenRCG(TargetThenGen);
10443 ThenRCG(CGF);
10444 }
10445 } else {
10446 RegionCodeGenTy ElseRCG(TargetElseGen);
10447 ElseRCG(CGF);
10448 }
10449}
10450
10452 StringRef ParentName) {
10453 if (!S)
10454 return;
10455
10456 // Codegen OMP target directives that offload compute to the device.
10457 bool RequiresDeviceCodegen =
10460 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10461
10462 if (RequiresDeviceCodegen) {
10463 const auto &E = *cast<OMPExecutableDirective>(S);
10464
10465 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
10466 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
10467
10468 // Is this a target region that should not be emitted as an entry point? If
10469 // so just signal we are done with this target region.
10470 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10471 return;
10472
10473 switch (E.getDirectiveKind()) {
10474 case OMPD_target:
10477 break;
10478 case OMPD_target_parallel:
10480 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10481 break;
10482 case OMPD_target_teams:
10484 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10485 break;
10486 case OMPD_target_teams_distribute:
10489 break;
10490 case OMPD_target_teams_distribute_simd:
10493 break;
10494 case OMPD_target_parallel_for:
10497 break;
10498 case OMPD_target_parallel_for_simd:
10501 break;
10502 case OMPD_target_simd:
10504 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10505 break;
10506 case OMPD_target_teams_distribute_parallel_for:
10508 CGM, ParentName,
10510 break;
10511 case OMPD_target_teams_distribute_parallel_for_simd:
10514 CGM, ParentName,
10516 break;
10517 case OMPD_target_teams_loop:
10520 break;
10521 case OMPD_target_parallel_loop:
10524 break;
10525 case OMPD_parallel:
10526 case OMPD_for:
10527 case OMPD_parallel_for:
10528 case OMPD_parallel_master:
10529 case OMPD_parallel_sections:
10530 case OMPD_for_simd:
10531 case OMPD_parallel_for_simd:
10532 case OMPD_cancel:
10533 case OMPD_cancellation_point:
10534 case OMPD_ordered:
10535 case OMPD_threadprivate:
10536 case OMPD_allocate:
10537 case OMPD_task:
10538 case OMPD_simd:
10539 case OMPD_tile:
10540 case OMPD_unroll:
10541 case OMPD_sections:
10542 case OMPD_section:
10543 case OMPD_single:
10544 case OMPD_master:
10545 case OMPD_critical:
10546 case OMPD_taskyield:
10547 case OMPD_barrier:
10548 case OMPD_taskwait:
10549 case OMPD_taskgroup:
10550 case OMPD_atomic:
10551 case OMPD_flush:
10552 case OMPD_depobj:
10553 case OMPD_scan:
10554 case OMPD_teams:
10555 case OMPD_target_data:
10556 case OMPD_target_exit_data:
10557 case OMPD_target_enter_data:
10558 case OMPD_distribute:
10559 case OMPD_distribute_simd:
10560 case OMPD_distribute_parallel_for:
10561 case OMPD_distribute_parallel_for_simd:
10562 case OMPD_teams_distribute:
10563 case OMPD_teams_distribute_simd:
10564 case OMPD_teams_distribute_parallel_for:
10565 case OMPD_teams_distribute_parallel_for_simd:
10566 case OMPD_target_update:
10567 case OMPD_declare_simd:
10568 case OMPD_declare_variant:
10569 case OMPD_begin_declare_variant:
10570 case OMPD_end_declare_variant:
10571 case OMPD_declare_target:
10572 case OMPD_end_declare_target:
10573 case OMPD_declare_reduction:
10574 case OMPD_declare_mapper:
10575 case OMPD_taskloop:
10576 case OMPD_taskloop_simd:
10577 case OMPD_master_taskloop:
10578 case OMPD_master_taskloop_simd:
10579 case OMPD_parallel_master_taskloop:
10580 case OMPD_parallel_master_taskloop_simd:
10581 case OMPD_requires:
10582 case OMPD_metadirective:
10583 case OMPD_unknown:
10584 default:
10585 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10586 }
10587 return;
10588 }
10589
10590 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10591 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10592 return;
10593
10594 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10595 return;
10596 }
10597
10598 // If this is a lambda function, look into its body.
10599 if (const auto *L = dyn_cast<LambdaExpr>(S))
10600 S = L->getBody();
10601
10602 // Keep looking for target regions recursively.
10603 for (const Stmt *II : S->children())
10604 scanForTargetRegionsFunctions(II, ParentName);
10605}
10606
10607static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10608 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10609 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10610 if (!DevTy)
10611 return false;
10612 // Do not emit device_type(nohost) functions for the host.
10613 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10614 return true;
10615 // Do not emit device_type(host) functions for the device.
10616 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10617 return true;
10618 return false;
10619}
10620
10622 // If emitting code for the host, we do not process FD here. Instead we do
10623 // the normal code generation.
10624 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10625 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10627 CGM.getLangOpts().OpenMPIsTargetDevice))
10628 return true;
10629 return false;
10630 }
10631
10632 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10633 // Try to detect target regions in the function.
10634 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10635 StringRef Name = CGM.getMangledName(GD);
10638 CGM.getLangOpts().OpenMPIsTargetDevice))
10639 return true;
10640 }
10641
10642 // Do not to emit function if it is not marked as declare target.
10643 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10644 AlreadyEmittedTargetDecls.count(VD) == 0;
10645}
10646
10649 CGM.getLangOpts().OpenMPIsTargetDevice))
10650 return true;
10651
10652 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10653 return false;
10654
10655 // Check if there are Ctors/Dtors in this declaration and look for target
10656 // regions in it. We use the complete variant to produce the kernel name
10657 // mangling.
10658 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10659 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10660 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10661 StringRef ParentName =
10662 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10663 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10664 }
10665 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10666 StringRef ParentName =
10667 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10668 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10669 }
10670 }
10671
10672 // Do not to emit variable if it is not marked as declare target.
10673 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10674 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10675 cast<VarDecl>(GD.getDecl()));
10676 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10677 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10678 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10681 return true;
10682 }
10683 return false;
10684}
10685
10687 llvm::Constant *Addr) {
10688 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10689 !CGM.getLangOpts().OpenMPIsTargetDevice)
10690 return;
10691
10692 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10693 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10694
10695 // If this is an 'extern' declaration we defer to the canonical definition and
10696 // do not emit an offloading entry.
10697 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10698 VD->hasExternalStorage())
10699 return;
10700
10701 if (!Res) {
10702 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10703 // Register non-target variables being emitted in device code (debug info
10704 // may cause this).
10705 StringRef VarName = CGM.getMangledName(VD);
10706 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10707 }
10708 return;
10709 }
10710
10711 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10712 auto LinkageForVariable = [&VD, this]() {
10713 return CGM.getLLVMLinkageVarDefinition(VD);
10714 };
10715
10716 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10717 OMPBuilder.registerTargetGlobalVariable(
10719 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10720 VD->isExternallyVisible(),
10722 VD->getCanonicalDecl()->getBeginLoc()),
10723 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10724 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10725 CGM.getTypes().ConvertTypeForMem(
10726 CGM.getContext().getPointerType(VD->getType())),
10727 Addr);
10728
10729 for (auto *ref : GeneratedRefs)
10730 CGM.addCompilerUsedGlobal(ref);
10731}
10732
10734 if (isa<FunctionDecl>(GD.getDecl()) ||
10736 return emitTargetFunctions(GD);
10737
10738 return emitTargetGlobalVariable(GD);
10739}
10740
10742 for (const VarDecl *VD : DeferredGlobalVariables) {
10743 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10744 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10745 if (!Res)
10746 continue;
10747 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10748 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10750 CGM.EmitGlobal(VD);
10751 } else {
10752 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10753 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10754 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10756 "Expected link clause or to clause with unified memory.");
10757 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10758 }
10759 }
10760}
10761
10763 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10764 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10765 " Expected target-based directive.");
10766}
10767
10769 for (const OMPClause *Clause : D->clauselists()) {
10770 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10772 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10773 } else if (const auto *AC =
10774 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10775 switch (AC->getAtomicDefaultMemOrderKind()) {
10776 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10777 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10778 break;
10779 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10780 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10781 break;
10782 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10783 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10784 break;
10786 break;
10787 }
10788 }
10789 }
10790}
10791
10792llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10794}
10795
10797 LangAS &AS) {
10798 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10799 return false;
10800 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10801 switch(A->getAllocatorType()) {
10802 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10803 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10804 // Not supported, fallback to the default mem space.
10805 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10806 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10807 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10808 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10809 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10810 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10811 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10812 AS = LangAS::Default;
10813 return true;
10814 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10815 llvm_unreachable("Expected predefined allocator for the variables with the "
10816 "static storage.");
10817 }
10818 return false;
10819}
10820
10824
10826 CodeGenModule &CGM)
10827 : CGM(CGM) {
10828 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10829 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10830 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10831 }
10832}
10833
10835 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10836 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10837}
10838
10840 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10841 return true;
10842
10843 const auto *D = cast<FunctionDecl>(GD.getDecl());
10844 // Do not to emit function if it is marked as declare target as it was already
10845 // emitted.
10846 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10847 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10848 if (auto *F = dyn_cast_or_null<llvm::Function>(
10849 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10850 return !F->isDeclaration();
10851 return false;
10852 }
10853 return true;
10854 }
10855
10856 return !AlreadyEmittedTargetDecls.insert(D).second;
10857}
10858
10860 const OMPExecutableDirective &D,
10861 SourceLocation Loc,
10862 llvm::Function *OutlinedFn,
10863 ArrayRef<llvm::Value *> CapturedVars) {
10864 if (!CGF.HaveInsertPoint())
10865 return;
10866
10867 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10869
10870 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10871 llvm::Value *Args[] = {
10872 RTLoc,
10873 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10874 OutlinedFn};
10876 RealArgs.append(std::begin(Args), std::end(Args));
10877 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10878
10879 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10880 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10881 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10882}
10883
10885 const Expr *NumTeams,
10886 const Expr *ThreadLimit,
10887 SourceLocation Loc) {
10888 if (!CGF.HaveInsertPoint())
10889 return;
10890
10891 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10892
10893 llvm::Value *NumTeamsVal =
10894 NumTeams
10895 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10896 CGF.CGM.Int32Ty, /* isSigned = */ true)
10897 : CGF.Builder.getInt32(0);
10898
10899 llvm::Value *ThreadLimitVal =
10900 ThreadLimit
10901 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10902 CGF.CGM.Int32Ty, /* isSigned = */ true)
10903 : CGF.Builder.getInt32(0);
10904
10905 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10906 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10907 ThreadLimitVal};
10908 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10909 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10910 PushNumTeamsArgs);
10911}
10912
10914 const Expr *ThreadLimit,
10915 SourceLocation Loc) {
10916 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10917 llvm::Value *ThreadLimitVal =
10918 ThreadLimit
10919 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10920 CGF.CGM.Int32Ty, /* isSigned = */ true)
10921 : CGF.Builder.getInt32(0);
10922
10923 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10924 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10925 ThreadLimitVal};
10926 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10927 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10928 ThreadLimitArgs);
10929}
10930
10932 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10933 const Expr *Device, const RegionCodeGenTy &CodeGen,
10935 if (!CGF.HaveInsertPoint())
10936 return;
10937
10938 // Action used to replace the default codegen action and turn privatization
10939 // off.
10940 PrePostActionTy NoPrivAction;
10941
10942 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10943
10944 llvm::Value *IfCondVal = nullptr;
10945 if (IfCond)
10946 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10947
10948 // Emit device ID if any.
10949 llvm::Value *DeviceID = nullptr;
10950 if (Device) {
10951 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10952 CGF.Int64Ty, /*isSigned=*/true);
10953 } else {
10954 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10955 }
10956
10957 // Fill up the arrays with all the mapped variables.
10958 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10959 auto GenMapInfoCB =
10960 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10961 CGF.Builder.restoreIP(CodeGenIP);
10962 // Get map clause information.
10963 MappableExprsHandler MEHandler(D, CGF);
10964 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10965
10966 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10967 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10968 };
10969 if (CGM.getCodeGenOpts().getDebugInfo() !=
10970 llvm::codegenoptions::NoDebugInfo) {
10971 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10972 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10973 FillInfoMap);
10974 }
10975
10976 return CombinedInfo;
10977 };
10978 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10979 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10980 CGF.Builder.restoreIP(CodeGenIP);
10981 switch (BodyGenType) {
10982 case BodyGenTy::Priv:
10983 if (!Info.CaptureDeviceAddrMap.empty())
10984 CodeGen(CGF);
10985 break;
10986 case BodyGenTy::DupNoPriv:
10987 if (!Info.CaptureDeviceAddrMap.empty()) {
10988 CodeGen.setAction(NoPrivAction);
10989 CodeGen(CGF);
10990 }
10991 break;
10992 case BodyGenTy::NoPriv:
10993 if (Info.CaptureDeviceAddrMap.empty()) {
10994 CodeGen.setAction(NoPrivAction);
10995 CodeGen(CGF);
10996 }
10997 break;
10998 }
10999 return InsertPointTy(CGF.Builder.GetInsertBlock(),
11000 CGF.Builder.GetInsertPoint());
11001 };
11002
11003 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
11004 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
11005 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
11006 }
11007 };
11008
11009 auto CustomMapperCB = [&](unsigned int I) {
11010 llvm::Function *MFunc = nullptr;
11011 if (CombinedInfo.Mappers[I]) {
11012 Info.HasMapper = true;
11014 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
11015 }
11016 return MFunc;
11017 };
11018
11019 // Source location for the ident struct
11020 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11021
11022 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
11023 CGF.AllocaInsertPt->getIterator());
11024 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
11025 CGF.Builder.GetInsertPoint());
11026 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
11027 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
11028 cantFail(OMPBuilder.createTargetData(
11029 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
11030 CustomMapperCB,
11031 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
11032 CGF.Builder.restoreIP(AfterIP);
11033}
11034
11036 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11037 const Expr *Device) {
11038 if (!CGF.HaveInsertPoint())
11039 return;
11040
11044 "Expecting either target enter, exit data, or update directives.");
11045
11047 llvm::Value *MapTypesArray = nullptr;
11048 llvm::Value *MapNamesArray = nullptr;
11049 // Generate the code for the opening of the data environment.
11050 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11051 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11052 // Emit device ID if any.
11053 llvm::Value *DeviceID = nullptr;
11054 if (Device) {
11055 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11056 CGF.Int64Ty, /*isSigned=*/true);
11057 } else {
11058 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11059 }
11060
11061 // Emit the number of elements in the offloading arrays.
11062 llvm::Constant *PointerNum =
11063 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11064
11065 // Source location for the ident struct
11066 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11067
11068 SmallVector<llvm::Value *, 13> OffloadingArgs(
11069 {RTLoc, DeviceID, PointerNum,
11070 InputInfo.BasePointersArray.emitRawPointer(CGF),
11071 InputInfo.PointersArray.emitRawPointer(CGF),
11072 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11073 InputInfo.MappersArray.emitRawPointer(CGF)});
11074
11075 // Select the right runtime function call for each standalone
11076 // directive.
11077 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11078 RuntimeFunction RTLFn;
11079 switch (D.getDirectiveKind()) {
11080 case OMPD_target_enter_data:
11081 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11082 : OMPRTL___tgt_target_data_begin_mapper;
11083 break;
11084 case OMPD_target_exit_data:
11085 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11086 : OMPRTL___tgt_target_data_end_mapper;
11087 break;
11088 case OMPD_target_update:
11089 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11090 : OMPRTL___tgt_target_data_update_mapper;
11091 break;
11092 case OMPD_parallel:
11093 case OMPD_for:
11094 case OMPD_parallel_for:
11095 case OMPD_parallel_master:
11096 case OMPD_parallel_sections:
11097 case OMPD_for_simd:
11098 case OMPD_parallel_for_simd:
11099 case OMPD_cancel:
11100 case OMPD_cancellation_point:
11101 case OMPD_ordered:
11102 case OMPD_threadprivate:
11103 case OMPD_allocate:
11104 case OMPD_task:
11105 case OMPD_simd:
11106 case OMPD_tile:
11107 case OMPD_unroll:
11108 case OMPD_sections:
11109 case OMPD_section:
11110 case OMPD_single:
11111 case OMPD_master:
11112 case OMPD_critical:
11113 case OMPD_taskyield:
11114 case OMPD_barrier:
11115 case OMPD_taskwait:
11116 case OMPD_taskgroup:
11117 case OMPD_atomic:
11118 case OMPD_flush:
11119 case OMPD_depobj:
11120 case OMPD_scan:
11121 case OMPD_teams:
11122 case OMPD_target_data:
11123 case OMPD_distribute:
11124 case OMPD_distribute_simd:
11125 case OMPD_distribute_parallel_for:
11126 case OMPD_distribute_parallel_for_simd:
11127 case OMPD_teams_distribute:
11128 case OMPD_teams_distribute_simd:
11129 case OMPD_teams_distribute_parallel_for:
11130 case OMPD_teams_distribute_parallel_for_simd:
11131 case OMPD_declare_simd:
11132 case OMPD_declare_variant:
11133 case OMPD_begin_declare_variant:
11134 case OMPD_end_declare_variant:
11135 case OMPD_declare_target:
11136 case OMPD_end_declare_target:
11137 case OMPD_declare_reduction:
11138 case OMPD_declare_mapper:
11139 case OMPD_taskloop:
11140 case OMPD_taskloop_simd:
11141 case OMPD_master_taskloop:
11142 case OMPD_master_taskloop_simd:
11143 case OMPD_parallel_master_taskloop:
11144 case OMPD_parallel_master_taskloop_simd:
11145 case OMPD_target:
11146 case OMPD_target_simd:
11147 case OMPD_target_teams_distribute:
11148 case OMPD_target_teams_distribute_simd:
11149 case OMPD_target_teams_distribute_parallel_for:
11150 case OMPD_target_teams_distribute_parallel_for_simd:
11151 case OMPD_target_teams:
11152 case OMPD_target_parallel:
11153 case OMPD_target_parallel_for:
11154 case OMPD_target_parallel_for_simd:
11155 case OMPD_requires:
11156 case OMPD_metadirective:
11157 case OMPD_unknown:
11158 default:
11159 llvm_unreachable("Unexpected standalone target data directive.");
11160 break;
11161 }
11162 if (HasNowait) {
11163 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11164 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11165 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11166 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11167 }
11168 CGF.EmitRuntimeCall(
11169 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11170 OffloadingArgs);
11171 };
11172
11173 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11174 &MapNamesArray](CodeGenFunction &CGF,
11175 PrePostActionTy &) {
11176 // Fill up the arrays with all the mapped variables.
11177 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11179 MappableExprsHandler MEHandler(D, CGF);
11180 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11181 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11182 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11183
11184 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11185 D.hasClausesOfKind<OMPNowaitClause>();
11186
11187 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11188 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11189 CGF.VoidPtrTy, CGM.getPointerAlign());
11190 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11191 CGM.getPointerAlign());
11192 InputInfo.SizesArray =
11193 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11194 InputInfo.MappersArray =
11195 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11196 MapTypesArray = Info.RTArgs.MapTypesArray;
11197 MapNamesArray = Info.RTArgs.MapNamesArray;
11198 if (RequiresOuterTask)
11199 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11200 else
11201 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11202 };
11203
11204 if (IfCond) {
11205 emitIfClause(CGF, IfCond, TargetThenGen,
11206 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11207 } else {
11208 RegionCodeGenTy ThenRCG(TargetThenGen);
11209 ThenRCG(CGF);
11210 }
11211}
11212
11213namespace {
11214 /// Kind of parameter in a function with 'declare simd' directive.
11215enum ParamKindTy {
11216 Linear,
11217 LinearRef,
11218 LinearUVal,
11219 LinearVal,
11220 Uniform,
11221 Vector,
11222};
11223/// Attribute set of the parameter.
11224struct ParamAttrTy {
11225 ParamKindTy Kind = Vector;
11226 llvm::APSInt StrideOrArg;
11227 llvm::APSInt Alignment;
11228 bool HasVarStride = false;
11229};
11230} // namespace
11231
11232static unsigned evaluateCDTSize(const FunctionDecl *FD,
11233 ArrayRef<ParamAttrTy> ParamAttrs) {
11234 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11235 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11236 // of that clause. The VLEN value must be power of 2.
11237 // In other case the notion of the function`s "characteristic data type" (CDT)
11238 // is used to compute the vector length.
11239 // CDT is defined in the following order:
11240 // a) For non-void function, the CDT is the return type.
11241 // b) If the function has any non-uniform, non-linear parameters, then the
11242 // CDT is the type of the first such parameter.
11243 // c) If the CDT determined by a) or b) above is struct, union, or class
11244 // type which is pass-by-value (except for the type that maps to the
11245 // built-in complex data type), the characteristic data type is int.
11246 // d) If none of the above three cases is applicable, the CDT is int.
11247 // The VLEN is then determined based on the CDT and the size of vector
11248 // register of that ISA for which current vector version is generated. The
11249 // VLEN is computed using the formula below:
11250 // VLEN = sizeof(vector_register) / sizeof(CDT),
11251 // where vector register size specified in section 3.2.1 Registers and the
11252 // Stack Frame of original AMD64 ABI document.
11253 QualType RetType = FD->getReturnType();
11254 if (RetType.isNull())
11255 return 0;
11256 ASTContext &C = FD->getASTContext();
11257 QualType CDT;
11258 if (!RetType.isNull() && !RetType->isVoidType()) {
11259 CDT = RetType;
11260 } else {
11261 unsigned Offset = 0;
11262 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11263 if (ParamAttrs[Offset].Kind == Vector)
11264 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11265 ++Offset;
11266 }
11267 if (CDT.isNull()) {
11268 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11269 if (ParamAttrs[I + Offset].Kind == Vector) {
11270 CDT = FD->getParamDecl(I)->getType();
11271 break;
11272 }
11273 }
11274 }
11275 }
11276 if (CDT.isNull())
11277 CDT = C.IntTy;
11278 CDT = CDT->getCanonicalTypeUnqualified();
11279 if (CDT->isRecordType() || CDT->isUnionType())
11280 CDT = C.IntTy;
11281 return C.getTypeSize(CDT);
11282}
11283
11284/// Mangle the parameter part of the vector function name according to
11285/// their OpenMP classification. The mangling function is defined in
11286/// section 4.5 of the AAVFABI(2021Q1).
11287static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11288 SmallString<256> Buffer;
11289 llvm::raw_svector_ostream Out(Buffer);
11290 for (const auto &ParamAttr : ParamAttrs) {
11291 switch (ParamAttr.Kind) {
11292 case Linear:
11293 Out << 'l';
11294 break;
11295 case LinearRef:
11296 Out << 'R';
11297 break;
11298 case LinearUVal:
11299 Out << 'U';
11300 break;
11301 case LinearVal:
11302 Out << 'L';
11303 break;
11304 case Uniform:
11305 Out << 'u';
11306 break;
11307 case Vector:
11308 Out << 'v';
11309 break;
11310 }
11311 if (ParamAttr.HasVarStride)
11312 Out << "s" << ParamAttr.StrideOrArg;
11313 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11314 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11315 // Don't print the step value if it is not present or if it is
11316 // equal to 1.
11317 if (ParamAttr.StrideOrArg < 0)
11318 Out << 'n' << -ParamAttr.StrideOrArg;
11319 else if (ParamAttr.StrideOrArg != 1)
11320 Out << ParamAttr.StrideOrArg;
11321 }
11322
11323 if (!!ParamAttr.Alignment)
11324 Out << 'a' << ParamAttr.Alignment;
11325 }
11326
11327 return std::string(Out.str());
11328}
11329
11330static void
11331emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11332 const llvm::APSInt &VLENVal,
11333 ArrayRef<ParamAttrTy> ParamAttrs,
11334 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11335 struct ISADataTy {
11336 char ISA;
11337 unsigned VecRegSize;
11338 };
11339 ISADataTy ISAData[] = {
11340 {
11341 'b', 128
11342 }, // SSE
11343 {
11344 'c', 256
11345 }, // AVX
11346 {
11347 'd', 256
11348 }, // AVX2
11349 {
11350 'e', 512
11351 }, // AVX512
11352 };
11354 switch (State) {
11355 case OMPDeclareSimdDeclAttr::BS_Undefined:
11356 Masked.push_back('N');
11357 Masked.push_back('M');
11358 break;
11359 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11360 Masked.push_back('N');
11361 break;
11362 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11363 Masked.push_back('M');
11364 break;
11365 }
11366 for (char Mask : Masked) {
11367 for (const ISADataTy &Data : ISAData) {
11368 SmallString<256> Buffer;
11369 llvm::raw_svector_ostream Out(Buffer);
11370 Out << "_ZGV" << Data.ISA << Mask;
11371 if (!VLENVal) {
11372 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11373 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11374 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11375 } else {
11376 Out << VLENVal;
11377 }
11378 Out << mangleVectorParameters(ParamAttrs);
11379 Out << '_' << Fn->getName();
11380 Fn->addFnAttr(Out.str());
11381 }
11382 }
11383}
11384
11385// This are the Functions that are needed to mangle the name of the
11386// vector functions generated by the compiler, according to the rules
11387// defined in the "Vector Function ABI specifications for AArch64",
11388// available at
11389// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11390
11391/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11392static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11393 QT = QT.getCanonicalType();
11394
11395 if (QT->isVoidType())
11396 return false;
11397
11398 if (Kind == ParamKindTy::Uniform)
11399 return false;
11400
11401 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
11402 return false;
11403
11404 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11405 !QT->isReferenceType())
11406 return false;
11407
11408 return true;
11409}
11410
11411/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11413 QT = QT.getCanonicalType();
11414 unsigned Size = C.getTypeSize(QT);
11415
11416 // Only scalars and complex within 16 bytes wide set PVB to true.
11417 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11418 return false;
11419
11420 if (QT->isFloatingType())
11421 return true;
11422
11423 if (QT->isIntegerType())
11424 return true;
11425
11426 if (QT->isPointerType())
11427 return true;
11428
11429 // TODO: Add support for complex types (section 3.1.2, item 2).
11430
11431 return false;
11432}
11433
11434/// Computes the lane size (LS) of a return type or of an input parameter,
11435/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11436/// TODO: Add support for references, section 3.2.1, item 1.
11437static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11438 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11440 if (getAArch64PBV(PTy, C))
11441 return C.getTypeSize(PTy);
11442 }
11443 if (getAArch64PBV(QT, C))
11444 return C.getTypeSize(QT);
11445
11446 return C.getTypeSize(C.getUIntPtrType());
11447}
11448
11449// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11450// signature of the scalar function, as defined in 3.2.2 of the
11451// AAVFABI.
11452static std::tuple<unsigned, unsigned, bool>
11454 QualType RetType = FD->getReturnType().getCanonicalType();
11455
11456 ASTContext &C = FD->getASTContext();
11457
11458 bool OutputBecomesInput = false;
11459
11461 if (!RetType->isVoidType()) {
11462 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11463 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11464 OutputBecomesInput = true;
11465 }
11466 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11468 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11469 }
11470
11471 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11472 // The LS of a function parameter / return value can only be a power
11473 // of 2, starting from 8 bits, up to 128.
11474 assert(llvm::all_of(Sizes,
11475 [](unsigned Size) {
11476 return Size == 8 || Size == 16 || Size == 32 ||
11477 Size == 64 || Size == 128;
11478 }) &&
11479 "Invalid size");
11480
11481 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
11482 OutputBecomesInput);
11483}
11484
11485// Function used to add the attribute. The parameter `VLEN` is
11486// templated to allow the use of "x" when targeting scalable functions
11487// for SVE.
11488template <typename T>
11489static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11490 char ISA, StringRef ParSeq,
11491 StringRef MangledName, bool OutputBecomesInput,
11492 llvm::Function *Fn) {
11493 SmallString<256> Buffer;
11494 llvm::raw_svector_ostream Out(Buffer);
11495 Out << Prefix << ISA << LMask << VLEN;
11496 if (OutputBecomesInput)
11497 Out << "v";
11498 Out << ParSeq << "_" << MangledName;
11499 Fn->addFnAttr(Out.str());
11500}
11501
11502// Helper function to generate the Advanced SIMD names depending on
11503// the value of the NDS when simdlen is not present.
11504static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11505 StringRef Prefix, char ISA,
11506 StringRef ParSeq, StringRef MangledName,
11507 bool OutputBecomesInput,
11508 llvm::Function *Fn) {
11509 switch (NDS) {
11510 case 8:
11511 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11512 OutputBecomesInput, Fn);
11513 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11514 OutputBecomesInput, Fn);
11515 break;
11516 case 16:
11517 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11518 OutputBecomesInput, Fn);
11519 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11520 OutputBecomesInput, Fn);
11521 break;
11522 case 32:
11523 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11524 OutputBecomesInput, Fn);
11525 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11526 OutputBecomesInput, Fn);
11527 break;
11528 case 64:
11529 case 128:
11530 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11531 OutputBecomesInput, Fn);
11532 break;
11533 default:
11534 llvm_unreachable("Scalar type is too wide.");
11535 }
11536}
11537
11538/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11540 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11541 ArrayRef<ParamAttrTy> ParamAttrs,
11542 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11543 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11544
11545 // Get basic data for building the vector signature.
11546 const auto Data = getNDSWDS(FD, ParamAttrs);
11547 const unsigned NDS = std::get<0>(Data);
11548 const unsigned WDS = std::get<1>(Data);
11549 const bool OutputBecomesInput = std::get<2>(Data);
11550
11551 // Check the values provided via `simdlen` by the user.
11552 // 1. A `simdlen(1)` doesn't produce vector signatures,
11553 if (UserVLEN == 1) {
11554 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11556 "The clause simdlen(1) has no effect when targeting aarch64.");
11557 CGM.getDiags().Report(SLoc, DiagID);
11558 return;
11559 }
11560
11561 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11562 // Advanced SIMD output.
11563 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11564 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11565 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11566 "power of 2 when targeting Advanced SIMD.");
11567 CGM.getDiags().Report(SLoc, DiagID);
11568 return;
11569 }
11570
11571 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11572 // limits.
11573 if (ISA == 's' && UserVLEN != 0) {
11574 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11575 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11576 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11577 "lanes in the architectural constraints "
11578 "for SVE (min is 128-bit, max is "
11579 "2048-bit, by steps of 128-bit)");
11580 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11581 return;
11582 }
11583 }
11584
11585 // Sort out parameter sequence.
11586 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11587 StringRef Prefix = "_ZGV";
11588 // Generate simdlen from user input (if any).
11589 if (UserVLEN) {
11590 if (ISA == 's') {
11591 // SVE generates only a masked function.
11592 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11593 OutputBecomesInput, Fn);
11594 } else {
11595 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11596 // Advanced SIMD generates one or two functions, depending on
11597 // the `[not]inbranch` clause.
11598 switch (State) {
11599 case OMPDeclareSimdDeclAttr::BS_Undefined:
11600 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11601 OutputBecomesInput, Fn);
11602 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11603 OutputBecomesInput, Fn);
11604 break;
11605 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11606 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11607 OutputBecomesInput, Fn);
11608 break;
11609 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11610 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11611 OutputBecomesInput, Fn);
11612 break;
11613 }
11614 }
11615 } else {
11616 // If no user simdlen is provided, follow the AAVFABI rules for
11617 // generating the vector length.
11618 if (ISA == 's') {
11619 // SVE, section 3.4.1, item 1.
11620 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11621 OutputBecomesInput, Fn);
11622 } else {
11623 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11624 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11625 // two vector names depending on the use of the clause
11626 // `[not]inbranch`.
11627 switch (State) {
11628 case OMPDeclareSimdDeclAttr::BS_Undefined:
11629 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11630 OutputBecomesInput, Fn);
11631 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11632 OutputBecomesInput, Fn);
11633 break;
11634 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11635 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11636 OutputBecomesInput, Fn);
11637 break;
11638 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11639 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11640 OutputBecomesInput, Fn);
11641 break;
11642 }
11643 }
11644 }
11645}
11646
11648 llvm::Function *Fn) {
11649 ASTContext &C = CGM.getContext();
11650 FD = FD->getMostRecentDecl();
11651 while (FD) {
11652 // Map params to their positions in function decl.
11653 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11654 if (isa<CXXMethodDecl>(FD))
11655 ParamPositions.try_emplace(FD, 0);
11656 unsigned ParamPos = ParamPositions.size();
11657 for (const ParmVarDecl *P : FD->parameters()) {
11658 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11659 ++ParamPos;
11660 }
11661 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11662 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11663 // Mark uniform parameters.
11664 for (const Expr *E : Attr->uniforms()) {
11665 E = E->IgnoreParenImpCasts();
11666 unsigned Pos;
11667 if (isa<CXXThisExpr>(E)) {
11668 Pos = ParamPositions[FD];
11669 } else {
11670 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11671 ->getCanonicalDecl();
11672 auto It = ParamPositions.find(PVD);
11673 assert(It != ParamPositions.end() && "Function parameter not found");
11674 Pos = It->second;
11675 }
11676 ParamAttrs[Pos].Kind = Uniform;
11677 }
11678 // Get alignment info.
11679 auto *NI = Attr->alignments_begin();
11680 for (const Expr *E : Attr->aligneds()) {
11681 E = E->IgnoreParenImpCasts();
11682 unsigned Pos;
11683 QualType ParmTy;
11684 if (isa<CXXThisExpr>(E)) {
11685 Pos = ParamPositions[FD];
11686 ParmTy = E->getType();
11687 } else {
11688 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11689 ->getCanonicalDecl();
11690 auto It = ParamPositions.find(PVD);
11691 assert(It != ParamPositions.end() && "Function parameter not found");
11692 Pos = It->second;
11693 ParmTy = PVD->getType();
11694 }
11695 ParamAttrs[Pos].Alignment =
11696 (*NI)
11697 ? (*NI)->EvaluateKnownConstInt(C)
11698 : llvm::APSInt::getUnsigned(
11699 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11700 .getQuantity());
11701 ++NI;
11702 }
11703 // Mark linear parameters.
11704 auto *SI = Attr->steps_begin();
11705 auto *MI = Attr->modifiers_begin();
11706 for (const Expr *E : Attr->linears()) {
11707 E = E->IgnoreParenImpCasts();
11708 unsigned Pos;
11709 bool IsReferenceType = false;
11710 // Rescaling factor needed to compute the linear parameter
11711 // value in the mangled name.
11712 unsigned PtrRescalingFactor = 1;
11713 if (isa<CXXThisExpr>(E)) {
11714 Pos = ParamPositions[FD];
11715 auto *P = cast<PointerType>(E->getType());
11716 PtrRescalingFactor = CGM.getContext()
11717 .getTypeSizeInChars(P->getPointeeType())
11718 .getQuantity();
11719 } else {
11720 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11721 ->getCanonicalDecl();
11722 auto It = ParamPositions.find(PVD);
11723 assert(It != ParamPositions.end() && "Function parameter not found");
11724 Pos = It->second;
11725 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11726 PtrRescalingFactor = CGM.getContext()
11727 .getTypeSizeInChars(P->getPointeeType())
11728 .getQuantity();
11729 else if (PVD->getType()->isReferenceType()) {
11730 IsReferenceType = true;
11731 PtrRescalingFactor =
11732 CGM.getContext()
11733 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11734 .getQuantity();
11735 }
11736 }
11737 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11738 if (*MI == OMPC_LINEAR_ref)
11739 ParamAttr.Kind = LinearRef;
11740 else if (*MI == OMPC_LINEAR_uval)
11741 ParamAttr.Kind = LinearUVal;
11742 else if (IsReferenceType)
11743 ParamAttr.Kind = LinearVal;
11744 else
11745 ParamAttr.Kind = Linear;
11746 // Assuming a stride of 1, for `linear` without modifiers.
11747 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11748 if (*SI) {
11750 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11751 if (const auto *DRE =
11752 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11753 if (const auto *StridePVD =
11754 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11755 ParamAttr.HasVarStride = true;
11756 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11757 assert(It != ParamPositions.end() &&
11758 "Function parameter not found");
11759 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11760 }
11761 }
11762 } else {
11763 ParamAttr.StrideOrArg = Result.Val.getInt();
11764 }
11765 }
11766 // If we are using a linear clause on a pointer, we need to
11767 // rescale the value of linear_step with the byte size of the
11768 // pointee type.
11769 if (!ParamAttr.HasVarStride &&
11770 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11771 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11772 ++SI;
11773 ++MI;
11774 }
11775 llvm::APSInt VLENVal;
11776 SourceLocation ExprLoc;
11777 const Expr *VLENExpr = Attr->getSimdlen();
11778 if (VLENExpr) {
11779 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11780 ExprLoc = VLENExpr->getExprLoc();
11781 }
11782 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11783 if (CGM.getTriple().isX86()) {
11784 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11785 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11786 unsigned VLEN = VLENVal.getExtValue();
11787 StringRef MangledName = Fn->getName();
11788 if (CGM.getTarget().hasFeature("sve"))
11789 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11790 MangledName, 's', 128, Fn, ExprLoc);
11791 else if (CGM.getTarget().hasFeature("neon"))
11792 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11793 MangledName, 'n', 128, Fn, ExprLoc);
11794 }
11795 }
11796 FD = FD->getPreviousDecl();
11797 }
11798}
11799
11800namespace {
11801/// Cleanup action for doacross support.
11802class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11803public:
11804 static const int DoacrossFinArgs = 2;
11805
11806private:
11807 llvm::FunctionCallee RTLFn;
11808 llvm::Value *Args[DoacrossFinArgs];
11809
11810public:
11811 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11812 ArrayRef<llvm::Value *> CallArgs)
11813 : RTLFn(RTLFn) {
11814 assert(CallArgs.size() == DoacrossFinArgs);
11815 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11816 }
11817 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11818 if (!CGF.HaveInsertPoint())
11819 return;
11820 CGF.EmitRuntimeCall(RTLFn, Args);
11821 }
11822};
11823} // namespace
11824
11826 const OMPLoopDirective &D,
11827 ArrayRef<Expr *> NumIterations) {
11828 if (!CGF.HaveInsertPoint())
11829 return;
11830
11831 ASTContext &C = CGM.getContext();
11832 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11833 RecordDecl *RD;
11834 if (KmpDimTy.isNull()) {
11835 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11836 // kmp_int64 lo; // lower
11837 // kmp_int64 up; // upper
11838 // kmp_int64 st; // stride
11839 // };
11840 RD = C.buildImplicitRecord("kmp_dim");
11841 RD->startDefinition();
11842 addFieldToRecordDecl(C, RD, Int64Ty);
11843 addFieldToRecordDecl(C, RD, Int64Ty);
11844 addFieldToRecordDecl(C, RD, Int64Ty);
11845 RD->completeDefinition();
11846 KmpDimTy = C.getCanonicalTagType(RD);
11847 } else {
11848 RD = KmpDimTy->castAsRecordDecl();
11849 }
11850 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11851 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11853
11854 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11855 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11856 enum { LowerFD = 0, UpperFD, StrideFD };
11857 // Fill dims with data.
11858 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11859 LValue DimsLVal = CGF.MakeAddrLValue(
11860 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11861 // dims.upper = num_iterations;
11862 LValue UpperLVal = CGF.EmitLValueForField(
11863 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11864 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11865 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11866 Int64Ty, NumIterations[I]->getExprLoc());
11867 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11868 // dims.stride = 1;
11869 LValue StrideLVal = CGF.EmitLValueForField(
11870 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11871 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11872 StrideLVal);
11873 }
11874
11875 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11876 // kmp_int32 num_dims, struct kmp_dim * dims);
11877 llvm::Value *Args[] = {
11878 emitUpdateLocation(CGF, D.getBeginLoc()),
11879 getThreadID(CGF, D.getBeginLoc()),
11880 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11882 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11883 CGM.VoidPtrTy)};
11884
11885 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11886 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11887 CGF.EmitRuntimeCall(RTLFn, Args);
11888 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11889 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11890 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11891 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11892 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11893 llvm::ArrayRef(FiniArgs));
11894}
11895
11896template <typename T>
11898 const T *C, llvm::Value *ULoc,
11899 llvm::Value *ThreadID) {
11900 QualType Int64Ty =
11901 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11902 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11904 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11905 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11906 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11907 const Expr *CounterVal = C->getLoopData(I);
11908 assert(CounterVal);
11909 llvm::Value *CntVal = CGF.EmitScalarConversion(
11910 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11911 CounterVal->getExprLoc());
11912 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11913 /*Volatile=*/false, Int64Ty);
11914 }
11915 llvm::Value *Args[] = {
11916 ULoc, ThreadID,
11917 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11918 llvm::FunctionCallee RTLFn;
11919 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11920 OMPDoacrossKind<T> ODK;
11921 if (ODK.isSource(C)) {
11922 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11923 OMPRTL___kmpc_doacross_post);
11924 } else {
11925 assert(ODK.isSink(C) && "Expect sink modifier.");
11926 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11927 OMPRTL___kmpc_doacross_wait);
11928 }
11929 CGF.EmitRuntimeCall(RTLFn, Args);
11930}
11931
11933 const OMPDependClause *C) {
11935 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11936 getThreadID(CGF, C->getBeginLoc()));
11937}
11938
11940 const OMPDoacrossClause *C) {
11942 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11943 getThreadID(CGF, C->getBeginLoc()));
11944}
11945
11947 llvm::FunctionCallee Callee,
11948 ArrayRef<llvm::Value *> Args) const {
11949 assert(Loc.isValid() && "Outlined function call location must be valid.");
11951
11952 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11953 if (Fn->doesNotThrow()) {
11954 CGF.EmitNounwindRuntimeCall(Fn, Args);
11955 return;
11956 }
11957 }
11958 CGF.EmitRuntimeCall(Callee, Args);
11959}
11960
11962 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11963 ArrayRef<llvm::Value *> Args) const {
11964 emitCall(CGF, Loc, OutlinedFn, Args);
11965}
11966
11968 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11969 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11971}
11972
11974 const VarDecl *NativeParam,
11975 const VarDecl *TargetParam) const {
11976 return CGF.GetAddrOfLocalVar(NativeParam);
11977}
11978
11979/// Return allocator value from expression, or return a null allocator (default
11980/// when no allocator specified).
11981static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11982 const Expr *Allocator) {
11983 llvm::Value *AllocVal;
11984 if (Allocator) {
11985 AllocVal = CGF.EmitScalarExpr(Allocator);
11986 // According to the standard, the original allocator type is a enum
11987 // (integer). Convert to pointer type, if required.
11988 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11989 CGF.getContext().VoidPtrTy,
11990 Allocator->getExprLoc());
11991 } else {
11992 // If no allocator specified, it defaults to the null allocator.
11993 AllocVal = llvm::Constant::getNullValue(
11995 }
11996 return AllocVal;
11997}
11998
11999/// Return the alignment from an allocate directive if present.
12000static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12001 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12002
12003 if (!AllocateAlignment)
12004 return nullptr;
12005
12006 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12007}
12008
12010 const VarDecl *VD) {
12011 if (!VD)
12012 return Address::invalid();
12013 Address UntiedAddr = Address::invalid();
12014 Address UntiedRealAddr = Address::invalid();
12015 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12016 if (It != FunctionToUntiedTaskStackMap.end()) {
12017 const UntiedLocalVarsAddressesMap &UntiedData =
12018 UntiedLocalVarsStack[It->second];
12019 auto I = UntiedData.find(VD);
12020 if (I != UntiedData.end()) {
12021 UntiedAddr = I->second.first;
12022 UntiedRealAddr = I->second.second;
12023 }
12024 }
12025 const VarDecl *CVD = VD->getCanonicalDecl();
12026 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12027 // Use the default allocation.
12028 if (!isAllocatableDecl(VD))
12029 return UntiedAddr;
12030 llvm::Value *Size;
12031 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12032 if (CVD->getType()->isVariablyModifiedType()) {
12033 Size = CGF.getTypeSize(CVD->getType());
12034 // Align the size: ((size + align - 1) / align) * align
12035 Size = CGF.Builder.CreateNUWAdd(
12036 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12037 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12038 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12039 } else {
12040 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12041 Size = CGM.getSize(Sz.alignTo(Align));
12042 }
12043 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12044 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12045 const Expr *Allocator = AA->getAllocator();
12046 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12047 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12049 Args.push_back(ThreadID);
12050 if (Alignment)
12051 Args.push_back(Alignment);
12052 Args.push_back(Size);
12053 Args.push_back(AllocVal);
12054 llvm::omp::RuntimeFunction FnID =
12055 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12056 llvm::Value *Addr = CGF.EmitRuntimeCall(
12057 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12058 getName({CVD->getName(), ".void.addr"}));
12059 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12060 CGM.getModule(), OMPRTL___kmpc_free);
12061 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12063 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12064 if (UntiedAddr.isValid())
12065 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12066
12067 // Cleanup action for allocate support.
12068 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12069 llvm::FunctionCallee RTLFn;
12070 SourceLocation::UIntTy LocEncoding;
12071 Address Addr;
12072 const Expr *AllocExpr;
12073
12074 public:
12075 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12076 SourceLocation::UIntTy LocEncoding, Address Addr,
12077 const Expr *AllocExpr)
12078 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12079 AllocExpr(AllocExpr) {}
12080 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12081 if (!CGF.HaveInsertPoint())
12082 return;
12083 llvm::Value *Args[3];
12084 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12085 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12087 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12088 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12089 Args[2] = AllocVal;
12090 CGF.EmitRuntimeCall(RTLFn, Args);
12091 }
12092 };
12093 Address VDAddr =
12094 UntiedRealAddr.isValid()
12095 ? UntiedRealAddr
12096 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12097 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12098 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12099 VDAddr, Allocator);
12100 if (UntiedRealAddr.isValid())
12101 if (auto *Region =
12102 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12103 Region->emitUntiedSwitch(CGF);
12104 return VDAddr;
12105 }
12106 return UntiedAddr;
12107}
12108
12110 const VarDecl *VD) const {
12111 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12112 if (It == FunctionToUntiedTaskStackMap.end())
12113 return false;
12114 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12115}
12116
12118 CodeGenModule &CGM, const OMPLoopDirective &S)
12119 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12120 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12121 if (!NeedToPush)
12122 return;
12124 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12125 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12126 for (const Stmt *Ref : C->private_refs()) {
12127 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12128 const ValueDecl *VD;
12129 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12130 VD = DRE->getDecl();
12131 } else {
12132 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12133 assert((ME->isImplicitCXXThis() ||
12134 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12135 "Expected member of current class.");
12136 VD = ME->getMemberDecl();
12137 }
12138 DS.insert(VD);
12139 }
12140 }
12141}
12142
12144 if (!NeedToPush)
12145 return;
12146 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12147}
12148
12150 CodeGenFunction &CGF,
12151 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12152 std::pair<Address, Address>> &LocalVars)
12153 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12154 if (!NeedToPush)
12155 return;
12156 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12157 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12158 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12159}
12160
12162 if (!NeedToPush)
12163 return;
12164 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12165}
12166
12168 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12169
12170 return llvm::any_of(
12171 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12172 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12173}
12174
12175void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12176 const OMPExecutableDirective &S,
12177 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12178 const {
12179 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12180 // Vars in target/task regions must be excluded completely.
12181 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12182 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12184 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12185 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12186 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12187 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12188 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12189 }
12190 }
12191 // Exclude vars in private clauses.
12192 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12193 for (const Expr *Ref : C->varlist()) {
12194 if (!Ref->getType()->isScalarType())
12195 continue;
12196 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12197 if (!DRE)
12198 continue;
12199 NeedToCheckForLPCs.insert(DRE->getDecl());
12200 }
12201 }
12202 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12203 for (const Expr *Ref : C->varlist()) {
12204 if (!Ref->getType()->isScalarType())
12205 continue;
12206 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12207 if (!DRE)
12208 continue;
12209 NeedToCheckForLPCs.insert(DRE->getDecl());
12210 }
12211 }
12212 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12213 for (const Expr *Ref : C->varlist()) {
12214 if (!Ref->getType()->isScalarType())
12215 continue;
12216 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12217 if (!DRE)
12218 continue;
12219 NeedToCheckForLPCs.insert(DRE->getDecl());
12220 }
12221 }
12222 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12223 for (const Expr *Ref : C->varlist()) {
12224 if (!Ref->getType()->isScalarType())
12225 continue;
12226 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12227 if (!DRE)
12228 continue;
12229 NeedToCheckForLPCs.insert(DRE->getDecl());
12230 }
12231 }
12232 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12233 for (const Expr *Ref : C->varlist()) {
12234 if (!Ref->getType()->isScalarType())
12235 continue;
12236 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12237 if (!DRE)
12238 continue;
12239 NeedToCheckForLPCs.insert(DRE->getDecl());
12240 }
12241 }
12242 for (const Decl *VD : NeedToCheckForLPCs) {
12243 for (const LastprivateConditionalData &Data :
12244 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12245 if (Data.DeclToUniqueName.count(VD) > 0) {
12246 if (!Data.Disabled)
12247 NeedToAddForLPCsAsDisabled.insert(VD);
12248 break;
12249 }
12250 }
12251 }
12252}
12253
12254CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12255 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12256 : CGM(CGF.CGM),
12257 Action((CGM.getLangOpts().OpenMP >= 50 &&
12258 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12259 [](const OMPLastprivateClause *C) {
12260 return C->getKind() ==
12261 OMPC_LASTPRIVATE_conditional;
12262 }))
12263 ? ActionToDo::PushAsLastprivateConditional
12264 : ActionToDo::DoNotPush) {
12265 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12266 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12267 return;
12268 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12269 "Expected a push action.");
12271 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12272 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12273 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12274 continue;
12275
12276 for (const Expr *Ref : C->varlist()) {
12277 Data.DeclToUniqueName.insert(std::make_pair(
12278 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12279 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12280 }
12281 }
12282 Data.IVLVal = IVLVal;
12283 Data.Fn = CGF.CurFn;
12284}
12285
12286CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12288 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12289 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12290 if (CGM.getLangOpts().OpenMP < 50)
12291 return;
12292 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12293 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12294 if (!NeedToAddForLPCsAsDisabled.empty()) {
12295 Action = ActionToDo::DisableLastprivateConditional;
12296 LastprivateConditionalData &Data =
12298 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12299 Data.DeclToUniqueName.try_emplace(VD);
12300 Data.Fn = CGF.CurFn;
12301 Data.Disabled = true;
12302 }
12303}
12304
12305CGOpenMPRuntime::LastprivateConditionalRAII
12307 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12308 return LastprivateConditionalRAII(CGF, S);
12309}
12310
12312 if (CGM.getLangOpts().OpenMP < 50)
12313 return;
12314 if (Action == ActionToDo::DisableLastprivateConditional) {
12315 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12316 "Expected list of disabled private vars.");
12317 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12318 }
12319 if (Action == ActionToDo::PushAsLastprivateConditional) {
12320 assert(
12321 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12322 "Expected list of lastprivate conditional vars.");
12323 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12324 }
12325}
12326
12328 const VarDecl *VD) {
12329 ASTContext &C = CGM.getContext();
12330 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12331 QualType NewType;
12332 const FieldDecl *VDField;
12333 const FieldDecl *FiredField;
12334 LValue BaseLVal;
12335 auto VI = I->getSecond().find(VD);
12336 if (VI == I->getSecond().end()) {
12337 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12338 RD->startDefinition();
12339 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12340 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12341 RD->completeDefinition();
12342 NewType = C.getCanonicalTagType(RD);
12343 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12344 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12345 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12346 } else {
12347 NewType = std::get<0>(VI->getSecond());
12348 VDField = std::get<1>(VI->getSecond());
12349 FiredField = std::get<2>(VI->getSecond());
12350 BaseLVal = std::get<3>(VI->getSecond());
12351 }
12352 LValue FiredLVal =
12353 CGF.EmitLValueForField(BaseLVal, FiredField);
12355 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12356 FiredLVal);
12357 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12358}
12359
12360namespace {
12361/// Checks if the lastprivate conditional variable is referenced in LHS.
12362class LastprivateConditionalRefChecker final
12363 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12365 const Expr *FoundE = nullptr;
12366 const Decl *FoundD = nullptr;
12367 StringRef UniqueDeclName;
12368 LValue IVLVal;
12369 llvm::Function *FoundFn = nullptr;
12370 SourceLocation Loc;
12371
12372public:
12373 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12375 llvm::reverse(LPM)) {
12376 auto It = D.DeclToUniqueName.find(E->getDecl());
12377 if (It == D.DeclToUniqueName.end())
12378 continue;
12379 if (D.Disabled)
12380 return false;
12381 FoundE = E;
12382 FoundD = E->getDecl()->getCanonicalDecl();
12383 UniqueDeclName = It->second;
12384 IVLVal = D.IVLVal;
12385 FoundFn = D.Fn;
12386 break;
12387 }
12388 return FoundE == E;
12389 }
12390 bool VisitMemberExpr(const MemberExpr *E) {
12392 return false;
12394 llvm::reverse(LPM)) {
12395 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12396 if (It == D.DeclToUniqueName.end())
12397 continue;
12398 if (D.Disabled)
12399 return false;
12400 FoundE = E;
12401 FoundD = E->getMemberDecl()->getCanonicalDecl();
12402 UniqueDeclName = It->second;
12403 IVLVal = D.IVLVal;
12404 FoundFn = D.Fn;
12405 break;
12406 }
12407 return FoundE == E;
12408 }
12409 bool VisitStmt(const Stmt *S) {
12410 for (const Stmt *Child : S->children()) {
12411 if (!Child)
12412 continue;
12413 if (const auto *E = dyn_cast<Expr>(Child))
12414 if (!E->isGLValue())
12415 continue;
12416 if (Visit(Child))
12417 return true;
12418 }
12419 return false;
12420 }
12421 explicit LastprivateConditionalRefChecker(
12422 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12423 : LPM(LPM) {}
12424 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12425 getFoundData() const {
12426 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12427 }
12428};
12429} // namespace
12430
12432 LValue IVLVal,
12433 StringRef UniqueDeclName,
12434 LValue LVal,
12435 SourceLocation Loc) {
12436 // Last updated loop counter for the lastprivate conditional var.
12437 // int<xx> last_iv = 0;
12438 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12439 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12440 LLIVTy, getName({UniqueDeclName, "iv"}));
12441 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12442 IVLVal.getAlignment().getAsAlign());
12443 LValue LastIVLVal =
12444 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12445
12446 // Last value of the lastprivate conditional.
12447 // decltype(priv_a) last_a;
12448 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12449 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12450 cast<llvm::GlobalVariable>(Last)->setAlignment(
12451 LVal.getAlignment().getAsAlign());
12452 LValue LastLVal =
12453 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12454
12455 // Global loop counter. Required to handle inner parallel-for regions.
12456 // iv
12457 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12458
12459 // #pragma omp critical(a)
12460 // if (last_iv <= iv) {
12461 // last_iv = iv;
12462 // last_a = priv_a;
12463 // }
12464 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12465 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12466 Action.Enter(CGF);
12467 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12468 // (last_iv <= iv) ? Check if the variable is updated and store new
12469 // value in global var.
12470 llvm::Value *CmpRes;
12471 if (IVLVal.getType()->isSignedIntegerType()) {
12472 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12473 } else {
12474 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12475 "Loop iteration variable must be integer.");
12476 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12477 }
12478 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12479 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12480 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12481 // {
12482 CGF.EmitBlock(ThenBB);
12483
12484 // last_iv = iv;
12485 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12486
12487 // last_a = priv_a;
12488 switch (CGF.getEvaluationKind(LVal.getType())) {
12489 case TEK_Scalar: {
12490 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12491 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12492 break;
12493 }
12494 case TEK_Complex: {
12495 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12496 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12497 break;
12498 }
12499 case TEK_Aggregate:
12500 llvm_unreachable(
12501 "Aggregates are not supported in lastprivate conditional.");
12502 }
12503 // }
12504 CGF.EmitBranch(ExitBB);
12505 // There is no need to emit line number for unconditional branch.
12507 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12508 };
12509
12510 if (CGM.getLangOpts().OpenMPSimd) {
12511 // Do not emit as a critical region as no parallel region could be emitted.
12512 RegionCodeGenTy ThenRCG(CodeGen);
12513 ThenRCG(CGF);
12514 } else {
12515 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12516 }
12517}
12518
12520 const Expr *LHS) {
12521 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12522 return;
12523 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12524 if (!Checker.Visit(LHS))
12525 return;
12526 const Expr *FoundE;
12527 const Decl *FoundD;
12528 StringRef UniqueDeclName;
12529 LValue IVLVal;
12530 llvm::Function *FoundFn;
12531 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12532 Checker.getFoundData();
12533 if (FoundFn != CGF.CurFn) {
12534 // Special codegen for inner parallel regions.
12535 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12536 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12537 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12538 "Lastprivate conditional is not found in outer region.");
12539 QualType StructTy = std::get<0>(It->getSecond());
12540 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12541 LValue PrivLVal = CGF.EmitLValue(FoundE);
12543 PrivLVal.getAddress(),
12544 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12545 CGF.ConvertTypeForMem(StructTy));
12546 LValue BaseLVal =
12547 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12548 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12549 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12550 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12551 FiredLVal, llvm::AtomicOrdering::Unordered,
12552 /*IsVolatile=*/true, /*isInit=*/false);
12553 return;
12554 }
12555
12556 // Private address of the lastprivate conditional in the current context.
12557 // priv_a
12558 LValue LVal = CGF.EmitLValue(FoundE);
12559 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12560 FoundE->getExprLoc());
12561}
12562
12565 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12566 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12567 return;
12568 auto Range = llvm::reverse(LastprivateConditionalStack);
12569 auto It = llvm::find_if(
12570 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12571 if (It == Range.end() || It->Fn != CGF.CurFn)
12572 return;
12573 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12574 assert(LPCI != LastprivateConditionalToTypes.end() &&
12575 "Lastprivates must be registered already.");
12577 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12578 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12579 for (const auto &Pair : It->DeclToUniqueName) {
12580 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12581 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12582 continue;
12583 auto I = LPCI->getSecond().find(Pair.first);
12584 assert(I != LPCI->getSecond().end() &&
12585 "Lastprivate must be rehistered already.");
12586 // bool Cmp = priv_a.Fired != 0;
12587 LValue BaseLVal = std::get<3>(I->getSecond());
12588 LValue FiredLVal =
12589 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12590 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12591 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12592 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12593 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12594 // if (Cmp) {
12595 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12596 CGF.EmitBlock(ThenBB);
12597 Address Addr = CGF.GetAddrOfLocalVar(VD);
12598 LValue LVal;
12599 if (VD->getType()->isReferenceType())
12600 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12602 else
12603 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12605 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12606 D.getBeginLoc());
12608 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12609 // }
12610 }
12611}
12612
12614 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12615 SourceLocation Loc) {
12616 if (CGF.getLangOpts().OpenMP < 50)
12617 return;
12618 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12619 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12620 "Unknown lastprivate conditional variable.");
12621 StringRef UniqueName = It->second;
12622 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12623 // The variable was not updated in the region - exit.
12624 if (!GV)
12625 return;
12626 LValue LPLVal = CGF.MakeRawAddrLValue(
12627 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12628 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12629 CGF.EmitStoreOfScalar(Res, PrivLVal);
12630}
12631
12634 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12635 const RegionCodeGenTy &CodeGen) {
12636 llvm_unreachable("Not supported in SIMD-only mode");
12637}
12638
12641 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12642 const RegionCodeGenTy &CodeGen) {
12643 llvm_unreachable("Not supported in SIMD-only mode");
12644}
12645
12647 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12648 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12649 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12650 bool Tied, unsigned &NumberOfParts) {
12651 llvm_unreachable("Not supported in SIMD-only mode");
12652}
12653
12655 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
12656 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
12657 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
12658 OpenMPSeverityClauseKind Severity, const Expr *Message) {
12659 llvm_unreachable("Not supported in SIMD-only mode");
12660}
12661
12663 CodeGenFunction &CGF, StringRef CriticalName,
12664 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12665 const Expr *Hint) {
12666 llvm_unreachable("Not supported in SIMD-only mode");
12667}
12668
12670 const RegionCodeGenTy &MasterOpGen,
12671 SourceLocation Loc) {
12672 llvm_unreachable("Not supported in SIMD-only mode");
12673}
12674
12676 const RegionCodeGenTy &MasterOpGen,
12677 SourceLocation Loc,
12678 const Expr *Filter) {
12679 llvm_unreachable("Not supported in SIMD-only mode");
12680}
12681
12683 SourceLocation Loc) {
12684 llvm_unreachable("Not supported in SIMD-only mode");
12685}
12686
12688 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12689 SourceLocation Loc) {
12690 llvm_unreachable("Not supported in SIMD-only mode");
12691}
12692
12694 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12695 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12697 ArrayRef<const Expr *> AssignmentOps) {
12698 llvm_unreachable("Not supported in SIMD-only mode");
12699}
12700
12702 const RegionCodeGenTy &OrderedOpGen,
12703 SourceLocation Loc,
12704 bool IsThreads) {
12705 llvm_unreachable("Not supported in SIMD-only mode");
12706}
12707
12709 SourceLocation Loc,
12711 bool EmitChecks,
12712 bool ForceSimpleCall) {
12713 llvm_unreachable("Not supported in SIMD-only mode");
12714}
12715
12718 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12719 bool Ordered, const DispatchRTInput &DispatchValues) {
12720 llvm_unreachable("Not supported in SIMD-only mode");
12721}
12722
12724 SourceLocation Loc) {
12725 llvm_unreachable("Not supported in SIMD-only mode");
12726}
12727
12730 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12731 llvm_unreachable("Not supported in SIMD-only mode");
12732}
12733
12736 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12737 llvm_unreachable("Not supported in SIMD-only mode");
12738}
12739
12741 SourceLocation Loc,
12742 unsigned IVSize,
12743 bool IVSigned) {
12744 llvm_unreachable("Not supported in SIMD-only mode");
12745}
12746
12748 SourceLocation Loc,
12749 OpenMPDirectiveKind DKind) {
12750 llvm_unreachable("Not supported in SIMD-only mode");
12751}
12752
12754 SourceLocation Loc,
12755 unsigned IVSize, bool IVSigned,
12756 Address IL, Address LB,
12757 Address UB, Address ST) {
12758 llvm_unreachable("Not supported in SIMD-only mode");
12759}
12760
12762 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
12764 SourceLocation SeverityLoc, const Expr *Message,
12765 SourceLocation MessageLoc) {
12766 llvm_unreachable("Not supported in SIMD-only mode");
12767}
12768
12770 ProcBindKind ProcBind,
12771 SourceLocation Loc) {
12772 llvm_unreachable("Not supported in SIMD-only mode");
12773}
12774
12776 const VarDecl *VD,
12777 Address VDAddr,
12778 SourceLocation Loc) {
12779 llvm_unreachable("Not supported in SIMD-only mode");
12780}
12781
12783 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12784 CodeGenFunction *CGF) {
12785 llvm_unreachable("Not supported in SIMD-only mode");
12786}
12787
12789 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12790 llvm_unreachable("Not supported in SIMD-only mode");
12791}
12792
12795 SourceLocation Loc,
12796 llvm::AtomicOrdering AO) {
12797 llvm_unreachable("Not supported in SIMD-only mode");
12798}
12799
12801 const OMPExecutableDirective &D,
12802 llvm::Function *TaskFunction,
12803 QualType SharedsTy, Address Shareds,
12804 const Expr *IfCond,
12805 const OMPTaskDataTy &Data) {
12806 llvm_unreachable("Not supported in SIMD-only mode");
12807}
12808
12811 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12812 const Expr *IfCond, const OMPTaskDataTy &Data) {
12813 llvm_unreachable("Not supported in SIMD-only mode");
12814}
12815
12819 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12820 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12821 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12822 ReductionOps, Options);
12823}
12824
12827 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12828 llvm_unreachable("Not supported in SIMD-only mode");
12829}
12830
12832 SourceLocation Loc,
12833 bool IsWorksharingReduction) {
12834 llvm_unreachable("Not supported in SIMD-only mode");
12835}
12836
12838 SourceLocation Loc,
12839 ReductionCodeGen &RCG,
12840 unsigned N) {
12841 llvm_unreachable("Not supported in SIMD-only mode");
12842}
12843
12845 SourceLocation Loc,
12846 llvm::Value *ReductionsPtr,
12847 LValue SharedLVal) {
12848 llvm_unreachable("Not supported in SIMD-only mode");
12849}
12850
12852 SourceLocation Loc,
12853 const OMPTaskDataTy &Data) {
12854 llvm_unreachable("Not supported in SIMD-only mode");
12855}
12856
12859 OpenMPDirectiveKind CancelRegion) {
12860 llvm_unreachable("Not supported in SIMD-only mode");
12861}
12862
12864 SourceLocation Loc, const Expr *IfCond,
12865 OpenMPDirectiveKind CancelRegion) {
12866 llvm_unreachable("Not supported in SIMD-only mode");
12867}
12868
12870 const OMPExecutableDirective &D, StringRef ParentName,
12871 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12872 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12873 llvm_unreachable("Not supported in SIMD-only mode");
12874}
12875
12878 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12879 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12880 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12881 const OMPLoopDirective &D)>
12882 SizeEmitter) {
12883 llvm_unreachable("Not supported in SIMD-only mode");
12884}
12885
12887 llvm_unreachable("Not supported in SIMD-only mode");
12888}
12889
12891 llvm_unreachable("Not supported in SIMD-only mode");
12892}
12893
12895 return false;
12896}
12897
12899 const OMPExecutableDirective &D,
12900 SourceLocation Loc,
12901 llvm::Function *OutlinedFn,
12902 ArrayRef<llvm::Value *> CapturedVars) {
12903 llvm_unreachable("Not supported in SIMD-only mode");
12904}
12905
12907 const Expr *NumTeams,
12908 const Expr *ThreadLimit,
12909 SourceLocation Loc) {
12910 llvm_unreachable("Not supported in SIMD-only mode");
12911}
12912
12914 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12915 const Expr *Device, const RegionCodeGenTy &CodeGen,
12917 llvm_unreachable("Not supported in SIMD-only mode");
12918}
12919
12921 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12922 const Expr *Device) {
12923 llvm_unreachable("Not supported in SIMD-only mode");
12924}
12925
12927 const OMPLoopDirective &D,
12928 ArrayRef<Expr *> NumIterations) {
12929 llvm_unreachable("Not supported in SIMD-only mode");
12930}
12931
12933 const OMPDependClause *C) {
12934 llvm_unreachable("Not supported in SIMD-only mode");
12935}
12936
12938 const OMPDoacrossClause *C) {
12939 llvm_unreachable("Not supported in SIMD-only mode");
12940}
12941
12942const VarDecl *
12944 const VarDecl *NativeParam) const {
12945 llvm_unreachable("Not supported in SIMD-only mode");
12946}
12947
12948Address
12950 const VarDecl *NativeParam,
12951 const VarDecl *TargetParam) const {
12952 llvm_unreachable("Not supported in SIMD-only mode");
12953}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
SourceManager & getSourceManager()
Definition ASTContext.h:837
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:930
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5266
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3722
Attr - This represents one attribute.
Definition Attr.h:45
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3878
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3912
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1353
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3918
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3906
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3909
This captures a statement into a function.
Definition Stmt.h:3865
const Capture * const_capture_iterator
Definition Stmt.h:3999
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4016
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:3986
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:3969
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1479
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4011
capture_range captures()
Definition Stmt.h:4003
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:193
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3127
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2394
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2278
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3136
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2251
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5358
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:176
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:244
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2377
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:4936
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:225
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5532
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2202
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2574
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3146
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:295
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1552
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:672
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:188
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1610
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5168
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1668
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1807
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:652
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2092
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2944
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1702
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:740
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:183
CharUnits getAlignment() const
Definition CGValue.h:344
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:339
Address getAddress() const
Definition CGValue.h:362
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:347
QualType getType() const
Definition CGValue.h:292
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:336
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:99
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:109
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:72
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:546
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition Diagnostic.h:905
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3113
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3091
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3086
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3666
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:273
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4295
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4031
Represents a member of a struct/union/class.
Definition Decl.h:3160
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3245
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3396
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4696
Represents a function declaration or definition.
Definition Decl.h:2000
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2797
QualType getReturnType() const
Definition Decl.h:2845
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2774
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3743
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3822
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5529
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:971
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3298
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3381
Expr * getBase() const
Definition Expr.h:3375
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
This represents clause 'affinity' in the 'pragma omp task'-based directives.
ArrayRef< MappableComponent > MappableExprComponentListRef
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
This represents clause 'in_reduction' in the 'pragma omp task' directives.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5474
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents clause 'lastprivate' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'nowait' clause in the 'pragma omp ...' directive.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'ordered' clause in the 'pragma omp ...' directive.
This represents clause 'private' in the 'pragma omp ...' directives.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents 'threadset' clause in the 'pragma omp task ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1790
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3328
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8278
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8318
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8463
QualType getCanonicalType() const
Definition TypeBase.h:8330
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4321
field_iterator field_end() const
Definition Decl.h:4527
field_range fields() const
Definition Decl.h:4524
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5225
bool field_empty() const
Definition Decl.h:4532
field_iterator field_begin() const
Definition Decl.cpp:5209
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:85
child_range children()
Definition Stmt.cpp:299
StmtClass getStmtClass() const
Definition Stmt.h:1472
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:338
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:350
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4902
bool isUnion() const
Definition Decl.h:3922
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8871
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9051
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2205
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8614
bool isPointerType() const
Definition TypeBase.h:8515
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8915
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9158
bool isReferenceType() const
Definition TypeBase.h:8539
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
bool isLValueReferenceType() const
Definition TypeBase.h:8543
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2411
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3119
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9044
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2800
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9144
bool isFloatingType() const
Definition Type.cpp:2304
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2253
bool isAnyPointerType() const
Definition TypeBase.h:8523
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9091
bool isRecordType() const
Definition TypeBase.h:8642
bool isUnionType() const
Definition Type.cpp:718
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2264
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2373
const Expr * getInit() const
Definition Decl.h:1368
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1217
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1295
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2382
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1262
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1358
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3966
Expr * getSizeExpr() const
Definition TypeBase.h:3980
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:155
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:146
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
@ Conditional
A conditional (?:) operator.
Definition Sema.h:667
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
static bool classof(const Stmt *T)
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5886
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:562
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1746
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5339
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.