clang 22.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <cstdint>
43#include <numeric>
44#include <optional>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479};
480
481/// Describes ident structure that describes a source location.
482/// All descriptions are taken from
483/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484/// Original structure:
485/// typedef struct ident {
486/// kmp_int32 reserved_1; /**< might be used in Fortran;
487/// see above */
488/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489/// KMP_IDENT_KMPC identifies this union
490/// member */
491/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492/// see above */
493///#if USE_ITT_BUILD
494/// /* but currently used for storing
495/// region-specific ITT */
496/// /* contextual information. */
497///#endif /* USE_ITT_BUILD */
498/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499/// C++ */
500/// char const *psource; /**< String describing the source location.
501/// The string is composed of semi-colon separated
502// fields which describe the source file,
503/// the function and a pair of line numbers that
504/// delimit the construct.
505/// */
506/// } ident_t;
507enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520};
521
522/// Schedule types for 'omp for' loops (these enumerators are taken from
523/// the enum sched_type in kmp.h).
524enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552};
553
554/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555/// region.
556class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559public:
560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566};
567
568} // anonymous namespace
569
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579}
580
581/// Check if the combiner is a call to UDR combiner and if it is so return the
582/// UDR decl used for reduction.
583static const OMPDeclareReductionDecl *
584getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
590 return DRD;
591 return nullptr;
592}
593
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
602 const auto *CE = cast<CallExpr>(InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
612 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
613 (void)PrivateScope.Privatize();
616 CGF.EmitIgnoredExpr(InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646}
647
648/// Emit initialization of arrays of complex types.
649/// \param DestAddr Address of the array.
650/// \param Type Type of array.
651/// \param Init Initial expression of array.
652/// \param SrcAddr Address of the original array.
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
691 "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(DestBegin, EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703
704 // Emit copy.
705 {
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
709 SrcElementCurrent, ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
719 "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
726 "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
731 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
735}
736
737LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739}
740
741LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
744 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
745 return LValue();
746}
747
748void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SharedAddr);
763}
764
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(Shareds.size());
770 SharedAddresses.reserve(Shareds.size());
771 Sizes.reserve(Shareds.size());
772 BaseDecls.reserve(Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
778 std::advance(IOrig, 1);
779 std::advance(IPriv, 1);
780 std::advance(IRed, 1);
781 }
782}
783
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
789 SharedAddresses.emplace_back(First, Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(First, Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
795 OrigAddresses.emplace_back(First, Second);
796 }
797}
798
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
805 nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemType,
814 OrigAddresses[N].second.getPointer(CGF),
815 OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823 }
824 Sizes.emplace_back(SizeInChars, Size);
826 CGF,
828 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
829 RValue::get(Size));
830 CGF.EmitVariablyModifiedType(PrivateType);
831}
832
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
843 CGF,
845 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
846 RValue::get(Size));
847 CGF.EmitVariablyModifiedType(PrivateType);
848}
849
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
866 PrivateAddr, SharedAddr, SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
870 PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873}
874
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879}
880
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
888 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
889 }
890}
891
892static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
907 BaseLV.getType(), BaseLV.getBaseInfo(),
908 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
909}
910
912 Address OriginalBaseAddress, llvm::Value *Addr) {
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919 Tmp = CGF.CreateMemTemp(BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
930 Addr, Tmp.getElementType());
931 CGF.Builder.CreateStore(Addr, Tmp);
932 return MostTopTmp;
933 }
934
936 Addr, OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
938}
939
940static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
949 OrigVD = cast<VarDecl>(DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
955 OrigVD = cast<VarDecl>(DE->getDecl());
956 }
957 return OrigVD;
958}
959
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968 OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
972 SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
975 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 SharedAddr.getElementType(), PrivatePointer, Adjustment);
978 return castToBase(CGF, OrigVD->getType(),
979 SharedAddresses[N].first.getType(),
980 OriginalBaseLValue.getAddress(), Ptr);
981 }
982 BaseDecls.emplace_back(
983 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985}
986
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991}
992
993LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
996 getThreadIDVariable()->getType()->castAs<PointerType>());
997}
998
999void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012}
1013
1014LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1017 getThreadIDVariable()->getType(),
1019}
1020
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1025 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(Field);
1029 return Field;
1030}
1031
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 Config.setDefaultTargetAS(
1041 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1042
1043 OMPBuilder.setConfig(Config);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1046 CGM.getLangOpts().OpenMPIsTargetDevice
1047 ? CGM.getLangOpts().OMPHostIRFile
1048 : StringRef{});
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056}
1057
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071}
1072
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075}
1076
1077static llvm::Function *
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(&OmpOutParm);
1090 Args.push_back(&OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097 Name, &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1100 Fn->removeFnAttr(llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108 Out->getLocation());
1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111 Scope.addPrivate(
1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115 Scope.addPrivate(
1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Out->getInit())) {
1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122 Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130}
1131
1134 if (UDRMap.count(D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, D->getType(), D->getCombiner(),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1144 CGM, D->getType(),
1146 : nullptr,
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(D, Combiner, Initializer);
1152 if (CGF)
1153 FunctionUDRMap[CGF->CurFn].push_back(D);
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1158 auto I = UDRMap.find(D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 return llvm::Error::success();
1196 };
1197
1198 // TODO: Remove this once we emit parallel regions through the
1199 // OpenMPIRBuilder as it can do this setup internally.
1200 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1201 OMPBuilder->pushFinalizationCB(std::move(FI));
1202 }
1203 ~PushAndPopStackRAII() {
1204 if (OMPBuilder)
1205 OMPBuilder->popFinalizationCB();
1206 }
1207 llvm::OpenMPIRBuilder *OMPBuilder;
1208};
1209} // namespace
1210
1212 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1213 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1214 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1215 assert(ThreadIDVar->getType()->isPointerType() &&
1216 "thread id variable must be of type kmp_int32 *");
1217 CodeGenFunction CGF(CGM, true);
1218 bool HasCancel = false;
1219 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1220 HasCancel = OPD->hasCancel();
1221 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1222 HasCancel = OPD->hasCancel();
1223 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1224 HasCancel = OPSD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1230 HasCancel = OPFD->hasCancel();
1231 else if (const auto *OPFD =
1232 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1233 HasCancel = OPFD->hasCancel();
1234 else if (const auto *OPFD =
1235 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237
1238 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1239 // parallel region to make cancellation barriers work properly.
1240 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1241 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1242 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1243 HasCancel, OutlinedHelperName);
1244 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1245 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1246}
1247
1248std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1249 std::string Suffix = getName({"omp_outlined"});
1250 return (Name + Suffix).str();
1251}
1252
1254 return getOutlinedHelperName(CGF.CurFn->getName());
1255}
1256
1257std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1258 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1259 return (Name + Suffix).str();
1260}
1261
1264 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1265 const RegionCodeGenTy &CodeGen) {
1266 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1268 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1269 CodeGen);
1270}
1271
1274 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1275 const RegionCodeGenTy &CodeGen) {
1276 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1278 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1279 CodeGen);
1280}
1281
1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1286 bool Tied, unsigned &NumberOfParts) {
1287 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1288 PrePostActionTy &) {
1289 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1290 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1291 llvm::Value *TaskArgs[] = {
1292 UpLoc, ThreadID,
1293 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1294 TaskTVar->getType()->castAs<PointerType>())
1295 .getPointer(CGF)};
1296 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1297 CGM.getModule(), OMPRTL___kmpc_omp_task),
1298 TaskArgs);
1299 };
1300 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1301 UntiedCodeGen);
1302 CodeGen.setAction(Action);
1303 assert(!ThreadIDVar->getType()->isPointerType() &&
1304 "thread id variable must be of type kmp_int32 for tasks");
1305 const OpenMPDirectiveKind Region =
1306 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1307 : OMPD_task;
1308 const CapturedStmt *CS = D.getCapturedStmt(Region);
1309 bool HasCancel = false;
1310 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1313 HasCancel = TD->hasCancel();
1314 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1315 HasCancel = TD->hasCancel();
1316 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1317 HasCancel = TD->hasCancel();
1318
1319 CodeGenFunction CGF(CGM, true);
1320 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1321 InnermostKind, HasCancel, Action);
1322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1323 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1324 if (!Tied)
1325 NumberOfParts = Action.getNumberOfParts();
1326 return Res;
1327}
1328
1330 bool AtCurrentPoint) {
1331 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1332 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1333
1334 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1335 if (AtCurrentPoint) {
1336 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1337 CGF.Builder.GetInsertBlock());
1338 } else {
1339 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1341 }
1342}
1343
1345 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1346 if (Elem.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1348 Elem.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1359 OS << ";";
1360 if (auto *DbgInfo = CGF.getDebugInfo())
1361 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1362 else
1363 OS << PLoc.getFilename();
1364 OS << ";";
1365 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1366 OS << FD->getQualifiedNameAsString();
1367 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1368 return OS.str();
1369}
1370
1372 SourceLocation Loc,
1373 unsigned Flags, bool EmitLoc) {
1374 uint32_t SrcLocStrSize;
1375 llvm::Constant *SrcLocStr;
1376 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1377 llvm::codegenoptions::NoDebugInfo) ||
1378 Loc.isInvalid()) {
1379 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1380 } else {
1381 std::string FunctionName;
1382 std::string FileName;
1383 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1384 FunctionName = FD->getQualifiedNameAsString();
1386 if (auto *DbgInfo = CGF.getDebugInfo())
1387 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1388 else
1389 FileName = PLoc.getFilename();
1390 unsigned Line = PLoc.getLine();
1391 unsigned Column = PLoc.getColumn();
1392 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1393 Column, SrcLocStrSize);
1394 }
1395 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1396 return OMPBuilder.getOrCreateIdent(
1397 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1398}
1399
1401 SourceLocation Loc) {
1402 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1403 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1404 // the clang invariants used below might be broken.
1405 if (CGM.getLangOpts().OpenMPIRBuilder) {
1406 SmallString<128> Buffer;
1407 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1408 uint32_t SrcLocStrSize;
1409 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1410 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1411 return OMPBuilder.getOrCreateThreadID(
1412 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1413 }
1414
1415 llvm::Value *ThreadID = nullptr;
1416 // Check whether we've already cached a load of the thread id in this
1417 // function.
1418 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1419 if (I != OpenMPLocThreadIDMap.end()) {
1420 ThreadID = I->second.ThreadID;
1421 if (ThreadID != nullptr)
1422 return ThreadID;
1423 }
1424 // If exceptions are enabled, do not use parameter to avoid possible crash.
1425 if (auto *OMPRegionInfo =
1426 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1427 if (OMPRegionInfo->getThreadIDVariable()) {
1428 // Check if this an outlined function with thread id passed as argument.
1429 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1430 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1431 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1432 !CGF.getLangOpts().CXXExceptions ||
1433 CGF.Builder.GetInsertBlock() == TopBlock ||
1434 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1435 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1436 TopBlock ||
1437 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1438 CGF.Builder.GetInsertBlock()) {
1439 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1440 // If value loaded in entry block, cache it and use it everywhere in
1441 // function.
1442 if (CGF.Builder.GetInsertBlock() == TopBlock)
1443 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1444 return ThreadID;
1445 }
1446 }
1447 }
1448
1449 // This is not an outlined function region - need to call __kmpc_int32
1450 // kmpc_global_thread_num(ident_t *loc).
1451 // Generate thread id value and cache this value for use across the
1452 // function.
1453 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1454 if (!Elem.ServiceInsertPt)
1456 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1457 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1459 llvm::CallInst *Call = CGF.Builder.CreateCall(
1460 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1461 OMPRTL___kmpc_global_thread_num),
1462 emitUpdateLocation(CGF, Loc));
1463 Call->setCallingConv(CGF.getRuntimeCC());
1464 Elem.ThreadID = Call;
1465 return Call;
1466}
1467
1469 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1470 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1472 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1473 }
1474 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1475 for (const auto *D : I->second)
1476 UDRMap.erase(D);
1477 FunctionUDRMap.erase(I);
1478 }
1479 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1480 for (const auto *D : I->second)
1481 UDMMap.erase(D);
1482 FunctionUDMMap.erase(I);
1483 }
1486}
1487
1489 return OMPBuilder.IdentPtr;
1490}
1491
1492static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1494 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1495 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1496 if (!DevTy)
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1498
1499 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1500 case OMPDeclareTargetDeclAttr::DT_Host:
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1502 break;
1503 case OMPDeclareTargetDeclAttr::DT_NoHost:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1505 break;
1506 case OMPDeclareTargetDeclAttr::DT_Any:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1508 break;
1509 default:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1511 break;
1512 }
1513}
1514
1515static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1517 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1519 if (!MapType)
1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1521 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1524 break;
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1527 break;
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1530 break;
1531 default:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1533 break;
1534 }
1535}
1536
1537static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1538 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1539 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1540
1541 auto FileInfoCallBack = [&]() {
1543 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1544
1545 if (!CGM.getFileSystem()->exists(PLoc.getFilename()))
1546 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1547
1548 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1549 };
1550
1551 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack,
1552 *CGM.getFileSystem(), ParentName);
1553}
1554
1556 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1557
1558 auto LinkageForVariable = [&VD, this]() {
1559 return CGM.getLLVMLinkageVarDefinition(VD);
1560 };
1561
1562 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1563
1564 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1565 CGM.getContext().getPointerType(VD->getType()));
1566 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1568 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1569 VD->isExternallyVisible(),
1571 VD->getCanonicalDecl()->getBeginLoc()),
1572 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1573 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1574 LinkageForVariable);
1575
1576 if (!addr)
1577 return ConstantAddress::invalid();
1578 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1579}
1580
1581llvm::Constant *
1583 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1584 !CGM.getContext().getTargetInfo().isTLSSupported());
1585 // Lookup the entry, lazily creating it if necessary.
1586 std::string Suffix = getName({"cache", ""});
1587 return OMPBuilder.getOrCreateInternalVariable(
1588 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1589}
1590
1592 const VarDecl *VD,
1593 Address VDAddr,
1594 SourceLocation Loc) {
1595 if (CGM.getLangOpts().OpenMPUseTLS &&
1596 CGM.getContext().getTargetInfo().isTLSSupported())
1597 return VDAddr;
1598
1599 llvm::Type *VarTy = VDAddr.getElementType();
1600 llvm::Value *Args[] = {
1601 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1602 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1603 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1605 return Address(
1606 CGF.EmitRuntimeCall(
1607 OMPBuilder.getOrCreateRuntimeFunction(
1608 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1609 Args),
1610 CGF.Int8Ty, VDAddr.getAlignment());
1611}
1612
1614 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1615 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1616 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1617 // library.
1618 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1619 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1620 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1621 OMPLoc);
1622 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1623 // to register constructor/destructor for variable.
1624 llvm::Value *Args[] = {
1625 OMPLoc,
1626 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1627 Ctor, CopyCtor, Dtor};
1628 CGF.EmitRuntimeCall(
1629 OMPBuilder.getOrCreateRuntimeFunction(
1630 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1631 Args);
1632}
1633
1635 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1636 bool PerformInit, CodeGenFunction *CGF) {
1637 if (CGM.getLangOpts().OpenMPUseTLS &&
1638 CGM.getContext().getTargetInfo().isTLSSupported())
1639 return nullptr;
1640
1641 VD = VD->getDefinition(CGM.getContext());
1642 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1643 QualType ASTTy = VD->getType();
1644
1645 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1646 const Expr *Init = VD->getAnyInitializer();
1647 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1648 // Generate function that re-emits the declaration's initializer into the
1649 // threadprivate copy of the variable VD
1650 CodeGenFunction CtorCGF(CGM);
1651 FunctionArgList Args;
1652 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1653 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1655 Args.push_back(&Dst);
1656
1657 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1658 CGM.getContext().VoidPtrTy, Args);
1659 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1660 std::string Name = getName({"__kmpc_global_ctor_", ""});
1661 llvm::Function *Fn =
1662 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1663 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1664 Args, Loc, Loc);
1665 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1666 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1667 CGM.getContext().VoidPtrTy, Dst.getLocation());
1668 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1669 VDAddr.getAlignment());
1670 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1671 /*IsInitializer=*/true);
1672 ArgVal = CtorCGF.EmitLoadOfScalar(
1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1674 CGM.getContext().VoidPtrTy, Dst.getLocation());
1675 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1676 CtorCGF.FinishFunction();
1677 Ctor = Fn;
1678 }
1680 // Generate function that emits destructor call for the threadprivate copy
1681 // of the variable VD
1682 CodeGenFunction DtorCGF(CGM);
1683 FunctionArgList Args;
1684 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1685 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1687 Args.push_back(&Dst);
1688
1689 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1690 CGM.getContext().VoidTy, Args);
1691 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1692 std::string Name = getName({"__kmpc_global_dtor_", ""});
1693 llvm::Function *Fn =
1694 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1695 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1696 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1697 Loc, Loc);
1698 // Create a scope with an artificial location for the body of this function.
1699 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1700 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1701 DtorCGF.GetAddrOfLocalVar(&Dst),
1702 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1703 DtorCGF.emitDestroy(
1704 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1705 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1706 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1707 DtorCGF.FinishFunction();
1708 Dtor = Fn;
1709 }
1710 // Do not emit init function if it is not required.
1711 if (!Ctor && !Dtor)
1712 return nullptr;
1713
1714 // Copying constructor for the threadprivate variable.
1715 // Must be NULL - reserved by runtime, but currently it requires that this
1716 // parameter is always NULL. Otherwise it fires assertion.
1717 CopyCtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1718 if (Ctor == nullptr) {
1719 Ctor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1720 }
1721 if (Dtor == nullptr) {
1722 Dtor = llvm::Constant::getNullValue(CGM.DefaultPtrTy);
1723 }
1724 if (!CGF) {
1725 auto *InitFunctionTy =
1726 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1727 std::string Name = getName({"__omp_threadprivate_init_", ""});
1728 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1729 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1730 CodeGenFunction InitCGF(CGM);
1731 FunctionArgList ArgList;
1732 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1733 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1734 Loc, Loc);
1735 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1736 InitCGF.FinishFunction();
1737 return InitFunction;
1738 }
1739 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1740 }
1741 return nullptr;
1742}
1743
1745 llvm::GlobalValue *GV) {
1746 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1747 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1748
1749 // We only need to handle active 'indirect' declare target functions.
1750 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1751 return;
1752
1753 // Get a mangled name to store the new device global in.
1754 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1756 SmallString<128> Name;
1757 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1758
1759 // We need to generate a new global to hold the address of the indirectly
1760 // called device function. Doing this allows us to keep the visibility and
1761 // linkage of the associated function unchanged while allowing the runtime to
1762 // access its value.
1763 llvm::GlobalValue *Addr = GV;
1764 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1765 llvm::PointerType *FnPtrTy = llvm::PointerType::get(
1766 CGM.getLLVMContext(),
1767 CGM.getModule().getDataLayout().getProgramAddressSpace());
1768 Addr = new llvm::GlobalVariable(
1769 CGM.getModule(), FnPtrTy,
1770 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1771 nullptr, llvm::GlobalValue::NotThreadLocal,
1772 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1773 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1774 }
1775
1776 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1777 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1778 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1779 llvm::GlobalValue::WeakODRLinkage);
1780}
1781
1783 QualType VarType,
1784 StringRef Name) {
1785 std::string Suffix = getName({"artificial", ""});
1786 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1787 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1788 VarLVType, Twine(Name).concat(Suffix).str());
1789 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1790 CGM.getTarget().isTLSSupported()) {
1791 GAddr->setThreadLocal(/*Val=*/true);
1792 return Address(GAddr, GAddr->getValueType(),
1793 CGM.getContext().getTypeAlignInChars(VarType));
1794 }
1795 std::string CacheSuffix = getName({"cache", ""});
1796 llvm::Value *Args[] = {
1799 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1800 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1801 /*isSigned=*/false),
1802 OMPBuilder.getOrCreateInternalVariable(
1803 CGM.VoidPtrPtrTy,
1804 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1805 return Address(
1807 CGF.EmitRuntimeCall(
1808 OMPBuilder.getOrCreateRuntimeFunction(
1809 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1810 Args),
1811 CGF.Builder.getPtrTy(0)),
1812 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1813}
1814
1816 const RegionCodeGenTy &ThenGen,
1817 const RegionCodeGenTy &ElseGen) {
1818 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1819
1820 // If the condition constant folds and can be elided, try to avoid emitting
1821 // the condition and the dead arm of the if/else.
1822 bool CondConstant;
1823 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1824 if (CondConstant)
1825 ThenGen(CGF);
1826 else
1827 ElseGen(CGF);
1828 return;
1829 }
1830
1831 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1832 // emit the conditional branch.
1833 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1834 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1835 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1836 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1837
1838 // Emit the 'then' code.
1839 CGF.EmitBlock(ThenBlock);
1840 ThenGen(CGF);
1841 CGF.EmitBranch(ContBlock);
1842 // Emit the 'else' code if present.
1843 // There is no need to emit line number for unconditional branch.
1845 CGF.EmitBlock(ElseBlock);
1846 ElseGen(CGF);
1847 // There is no need to emit line number for unconditional branch.
1849 CGF.EmitBranch(ContBlock);
1850 // Emit the continuation block for code after the if.
1851 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1852}
1853
1855 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1856 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1857 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1858 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1859 if (!CGF.HaveInsertPoint())
1860 return;
1861 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1862 auto &M = CGM.getModule();
1863 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1864 this](CodeGenFunction &CGF, PrePostActionTy &) {
1865 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1866 llvm::Value *Args[] = {
1867 RTLoc,
1868 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1869 OutlinedFn};
1871 RealArgs.append(std::begin(Args), std::end(Args));
1872 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1873
1874 llvm::FunctionCallee RTLFn =
1875 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1876 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1877 };
1878 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1879 this](CodeGenFunction &CGF, PrePostActionTy &) {
1881 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1882 // Build calls:
1883 // __kmpc_serialized_parallel(&Loc, GTid);
1884 llvm::Value *Args[] = {RTLoc, ThreadID};
1885 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1886 M, OMPRTL___kmpc_serialized_parallel),
1887 Args);
1888
1889 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1890 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1891 RawAddress ZeroAddrBound =
1893 /*Name=*/".bound.zero.addr");
1894 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1896 // ThreadId for serialized parallels is 0.
1897 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1898 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1899 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1900
1901 // Ensure we do not inline the function. This is trivially true for the ones
1902 // passed to __kmpc_fork_call but the ones called in serialized regions
1903 // could be inlined. This is not a perfect but it is closer to the invariant
1904 // we want, namely, every data environment starts with a new function.
1905 // TODO: We should pass the if condition to the runtime function and do the
1906 // handling there. Much cleaner code.
1907 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1908 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1909 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1910
1911 // __kmpc_end_serialized_parallel(&Loc, GTid);
1912 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1913 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1914 M, OMPRTL___kmpc_end_serialized_parallel),
1915 EndArgs);
1916 };
1917 if (IfCond) {
1918 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1919 } else {
1920 RegionCodeGenTy ThenRCG(ThenGen);
1921 ThenRCG(CGF);
1922 }
1923}
1924
1925// If we're inside an (outlined) parallel region, use the region info's
1926// thread-ID variable (it is passed in a first argument of the outlined function
1927// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1928// regular serial code region, get thread ID by calling kmp_int32
1929// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1930// return the address of that temp.
1932 SourceLocation Loc) {
1933 if (auto *OMPRegionInfo =
1934 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1935 if (OMPRegionInfo->getThreadIDVariable())
1936 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1937
1938 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1939 QualType Int32Ty =
1940 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1941 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1942 CGF.EmitStoreOfScalar(ThreadID,
1943 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1944
1945 return ThreadIDTemp;
1946}
1947
1948llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1949 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1950 std::string Name = getName({Prefix, "var"});
1951 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1952}
1953
1954namespace {
1955/// Common pre(post)-action for different OpenMP constructs.
1956class CommonActionTy final : public PrePostActionTy {
1957 llvm::FunctionCallee EnterCallee;
1958 ArrayRef<llvm::Value *> EnterArgs;
1959 llvm::FunctionCallee ExitCallee;
1960 ArrayRef<llvm::Value *> ExitArgs;
1961 bool Conditional;
1962 llvm::BasicBlock *ContBlock = nullptr;
1963
1964public:
1965 CommonActionTy(llvm::FunctionCallee EnterCallee,
1966 ArrayRef<llvm::Value *> EnterArgs,
1967 llvm::FunctionCallee ExitCallee,
1968 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1969 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1970 ExitArgs(ExitArgs), Conditional(Conditional) {}
1971 void Enter(CodeGenFunction &CGF) override {
1972 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1973 if (Conditional) {
1974 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1975 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1976 ContBlock = CGF.createBasicBlock("omp_if.end");
1977 // Generate the branch (If-stmt)
1978 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1979 CGF.EmitBlock(ThenBlock);
1980 }
1981 }
1982 void Done(CodeGenFunction &CGF) {
1983 // Emit the rest of blocks/branches
1984 CGF.EmitBranch(ContBlock);
1985 CGF.EmitBlock(ContBlock, true);
1986 }
1987 void Exit(CodeGenFunction &CGF) override {
1988 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1989 }
1990};
1991} // anonymous namespace
1992
1994 StringRef CriticalName,
1995 const RegionCodeGenTy &CriticalOpGen,
1996 SourceLocation Loc, const Expr *Hint) {
1997 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1998 // CriticalOpGen();
1999 // __kmpc_end_critical(ident_t *, gtid, Lock);
2000 // Prepare arguments and build a call to __kmpc_critical
2001 if (!CGF.HaveInsertPoint())
2002 return;
2003 llvm::FunctionCallee RuntimeFcn = OMPBuilder.getOrCreateRuntimeFunction(
2004 CGM.getModule(),
2005 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical);
2006 llvm::Value *LockVar = getCriticalRegionLock(CriticalName);
2007 unsigned LockVarArgIdx = 2;
2008 if (cast<llvm::GlobalVariable>(LockVar)->getAddressSpace() !=
2009 RuntimeFcn.getFunctionType()
2010 ->getParamType(LockVarArgIdx)
2011 ->getPointerAddressSpace())
2012 LockVar = CGF.Builder.CreateAddrSpaceCast(
2013 LockVar, RuntimeFcn.getFunctionType()->getParamType(LockVarArgIdx));
2014 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2015 LockVar};
2016 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2017 std::end(Args));
2018 if (Hint) {
2019 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2020 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2021 }
2022 CommonActionTy Action(RuntimeFcn, EnterArgs,
2023 OMPBuilder.getOrCreateRuntimeFunction(
2024 CGM.getModule(), OMPRTL___kmpc_end_critical),
2025 Args);
2026 CriticalOpGen.setAction(Action);
2027 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2028}
2029
2031 const RegionCodeGenTy &MasterOpGen,
2032 SourceLocation Loc) {
2033 if (!CGF.HaveInsertPoint())
2034 return;
2035 // if(__kmpc_master(ident_t *, gtid)) {
2036 // MasterOpGen();
2037 // __kmpc_end_master(ident_t *, gtid);
2038 // }
2039 // Prepare arguments and build a call to __kmpc_master
2040 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2041 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2042 CGM.getModule(), OMPRTL___kmpc_master),
2043 Args,
2044 OMPBuilder.getOrCreateRuntimeFunction(
2045 CGM.getModule(), OMPRTL___kmpc_end_master),
2046 Args,
2047 /*Conditional=*/true);
2048 MasterOpGen.setAction(Action);
2049 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2050 Action.Done(CGF);
2051}
2052
2054 const RegionCodeGenTy &MaskedOpGen,
2055 SourceLocation Loc, const Expr *Filter) {
2056 if (!CGF.HaveInsertPoint())
2057 return;
2058 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2059 // MaskedOpGen();
2060 // __kmpc_end_masked(iden_t *, gtid);
2061 // }
2062 // Prepare arguments and build a call to __kmpc_masked
2063 llvm::Value *FilterVal = Filter
2064 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2065 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2066 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2067 FilterVal};
2068 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2069 getThreadID(CGF, Loc)};
2070 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2071 CGM.getModule(), OMPRTL___kmpc_masked),
2072 Args,
2073 OMPBuilder.getOrCreateRuntimeFunction(
2074 CGM.getModule(), OMPRTL___kmpc_end_masked),
2075 ArgsEnd,
2076 /*Conditional=*/true);
2077 MaskedOpGen.setAction(Action);
2078 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2079 Action.Done(CGF);
2080}
2081
2083 SourceLocation Loc) {
2084 if (!CGF.HaveInsertPoint())
2085 return;
2086 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2087 OMPBuilder.createTaskyield(CGF.Builder);
2088 } else {
2089 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2090 llvm::Value *Args[] = {
2091 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2092 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2093 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2094 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2095 Args);
2096 }
2097
2098 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2099 Region->emitUntiedSwitch(CGF);
2100}
2101
2103 const RegionCodeGenTy &TaskgroupOpGen,
2104 SourceLocation Loc) {
2105 if (!CGF.HaveInsertPoint())
2106 return;
2107 // __kmpc_taskgroup(ident_t *, gtid);
2108 // TaskgroupOpGen();
2109 // __kmpc_end_taskgroup(ident_t *, gtid);
2110 // Prepare arguments and build a call to __kmpc_taskgroup
2111 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2112 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2113 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2114 Args,
2115 OMPBuilder.getOrCreateRuntimeFunction(
2116 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2117 Args);
2118 TaskgroupOpGen.setAction(Action);
2119 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2120}
2121
2122/// Given an array of pointers to variables, project the address of a
2123/// given variable.
2125 unsigned Index, const VarDecl *Var) {
2126 // Pull out the pointer to the variable.
2127 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2128 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2129
2130 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2131 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2132}
2133
2135 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2136 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2137 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2138 SourceLocation Loc) {
2139 ASTContext &C = CGM.getContext();
2140 // void copy_func(void *LHSArg, void *RHSArg);
2141 FunctionArgList Args;
2142 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2144 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2146 Args.push_back(&LHSArg);
2147 Args.push_back(&RHSArg);
2148 const auto &CGFI =
2149 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2150 std::string Name =
2151 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2152 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2153 llvm::GlobalValue::InternalLinkage, Name,
2154 &CGM.getModule());
2156 Fn->setDoesNotRecurse();
2157 CodeGenFunction CGF(CGM);
2158 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2159 // Dest = (void*[n])(LHSArg);
2160 // Src = (void*[n])(RHSArg);
2162 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2163 CGF.Builder.getPtrTy(0)),
2164 ArgsElemType, CGF.getPointerAlign());
2166 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2167 CGF.Builder.getPtrTy(0)),
2168 ArgsElemType, CGF.getPointerAlign());
2169 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2170 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2171 // ...
2172 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2173 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2174 const auto *DestVar =
2175 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2176 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2177
2178 const auto *SrcVar =
2179 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2180 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2181
2182 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2183 QualType Type = VD->getType();
2184 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2185 }
2186 CGF.FinishFunction();
2187 return Fn;
2188}
2189
2191 const RegionCodeGenTy &SingleOpGen,
2192 SourceLocation Loc,
2193 ArrayRef<const Expr *> CopyprivateVars,
2194 ArrayRef<const Expr *> SrcExprs,
2195 ArrayRef<const Expr *> DstExprs,
2196 ArrayRef<const Expr *> AssignmentOps) {
2197 if (!CGF.HaveInsertPoint())
2198 return;
2199 assert(CopyprivateVars.size() == SrcExprs.size() &&
2200 CopyprivateVars.size() == DstExprs.size() &&
2201 CopyprivateVars.size() == AssignmentOps.size());
2202 ASTContext &C = CGM.getContext();
2203 // int32 did_it = 0;
2204 // if(__kmpc_single(ident_t *, gtid)) {
2205 // SingleOpGen();
2206 // __kmpc_end_single(ident_t *, gtid);
2207 // did_it = 1;
2208 // }
2209 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2210 // <copy_func>, did_it);
2211
2212 Address DidIt = Address::invalid();
2213 if (!CopyprivateVars.empty()) {
2214 // int32 did_it = 0;
2215 QualType KmpInt32Ty =
2216 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2217 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2218 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2219 }
2220 // Prepare arguments and build a call to __kmpc_single
2221 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2222 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2223 CGM.getModule(), OMPRTL___kmpc_single),
2224 Args,
2225 OMPBuilder.getOrCreateRuntimeFunction(
2226 CGM.getModule(), OMPRTL___kmpc_end_single),
2227 Args,
2228 /*Conditional=*/true);
2229 SingleOpGen.setAction(Action);
2230 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2231 if (DidIt.isValid()) {
2232 // did_it = 1;
2233 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2234 }
2235 Action.Done(CGF);
2236 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2237 // <copy_func>, did_it);
2238 if (DidIt.isValid()) {
2239 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2240 QualType CopyprivateArrayTy = C.getConstantArrayType(
2241 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2242 /*IndexTypeQuals=*/0);
2243 // Create a list of all private variables for copyprivate.
2244 Address CopyprivateList =
2245 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2246 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2247 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2248 CGF.Builder.CreateStore(
2250 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2251 CGF.VoidPtrTy),
2252 Elem);
2253 }
2254 // Build function that copies private values from single region to all other
2255 // threads in the corresponding parallel region.
2256 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2257 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2258 SrcExprs, DstExprs, AssignmentOps, Loc);
2259 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2261 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2262 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2263 llvm::Value *Args[] = {
2264 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2265 getThreadID(CGF, Loc), // i32 <gtid>
2266 BufSize, // size_t <buf_size>
2267 CL.emitRawPointer(CGF), // void *<copyprivate list>
2268 CpyFn, // void (*) (void *, void *) <copy_func>
2269 DidItVal // i32 did_it
2270 };
2271 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2272 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2273 Args);
2274 }
2275}
2276
2278 const RegionCodeGenTy &OrderedOpGen,
2279 SourceLocation Loc, bool IsThreads) {
2280 if (!CGF.HaveInsertPoint())
2281 return;
2282 // __kmpc_ordered(ident_t *, gtid);
2283 // OrderedOpGen();
2284 // __kmpc_end_ordered(ident_t *, gtid);
2285 // Prepare arguments and build a call to __kmpc_ordered
2286 if (IsThreads) {
2287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2288 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2289 CGM.getModule(), OMPRTL___kmpc_ordered),
2290 Args,
2291 OMPBuilder.getOrCreateRuntimeFunction(
2292 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2293 Args);
2294 OrderedOpGen.setAction(Action);
2295 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2296 return;
2297 }
2298 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2299}
2300
2302 unsigned Flags;
2303 if (Kind == OMPD_for)
2304 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2305 else if (Kind == OMPD_sections)
2306 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2307 else if (Kind == OMPD_single)
2308 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2309 else if (Kind == OMPD_barrier)
2310 Flags = OMP_IDENT_BARRIER_EXPL;
2311 else
2312 Flags = OMP_IDENT_BARRIER_IMPL;
2313 return Flags;
2314}
2315
2317 CodeGenFunction &CGF, const OMPLoopDirective &S,
2318 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2319 // Check if the loop directive is actually a doacross loop directive. In this
2320 // case choose static, 1 schedule.
2321 if (llvm::any_of(
2322 S.getClausesOfKind<OMPOrderedClause>(),
2323 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2324 ScheduleKind = OMPC_SCHEDULE_static;
2325 // Chunk size is 1 in this case.
2326 llvm::APInt ChunkSize(32, 1);
2327 ChunkExpr = IntegerLiteral::Create(
2328 CGF.getContext(), ChunkSize,
2329 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2330 SourceLocation());
2331 }
2332}
2333
2335 OpenMPDirectiveKind Kind, bool EmitChecks,
2336 bool ForceSimpleCall) {
2337 // Check if we should use the OMPBuilder
2338 auto *OMPRegionInfo =
2339 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2340 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2341 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2342 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2343 EmitChecks));
2344 CGF.Builder.restoreIP(AfterIP);
2345 return;
2346 }
2347
2348 if (!CGF.HaveInsertPoint())
2349 return;
2350 // Build call __kmpc_cancel_barrier(loc, thread_id);
2351 // Build call __kmpc_barrier(loc, thread_id);
2352 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2353 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2354 // thread_id);
2355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2356 getThreadID(CGF, Loc)};
2357 if (OMPRegionInfo) {
2358 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2359 llvm::Value *Result = CGF.EmitRuntimeCall(
2360 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2361 OMPRTL___kmpc_cancel_barrier),
2362 Args);
2363 if (EmitChecks) {
2364 // if (__kmpc_cancel_barrier()) {
2365 // exit from construct;
2366 // }
2367 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2368 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2369 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2370 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2371 CGF.EmitBlock(ExitBB);
2372 // exit from construct;
2373 CodeGenFunction::JumpDest CancelDestination =
2374 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2375 CGF.EmitBranchThroughCleanup(CancelDestination);
2376 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2377 }
2378 return;
2379 }
2380 }
2381 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2382 CGM.getModule(), OMPRTL___kmpc_barrier),
2383 Args);
2384}
2385
2387 Expr *ME, bool IsFatal) {
2388 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2389 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2390 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2391 // *message)
2392 llvm::Value *Args[] = {
2393 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2394 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2395 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2396 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2397 CGM.getModule(), OMPRTL___kmpc_error),
2398 Args);
2399}
2400
2401/// Map the OpenMP loop schedule to the runtime enumeration.
2402static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2403 bool Chunked, bool Ordered) {
2404 switch (ScheduleKind) {
2405 case OMPC_SCHEDULE_static:
2406 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2407 : (Ordered ? OMP_ord_static : OMP_sch_static);
2408 case OMPC_SCHEDULE_dynamic:
2409 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2410 case OMPC_SCHEDULE_guided:
2411 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2412 case OMPC_SCHEDULE_runtime:
2413 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2414 case OMPC_SCHEDULE_auto:
2415 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2417 assert(!Chunked && "chunk was specified but schedule kind not known");
2418 return Ordered ? OMP_ord_static : OMP_sch_static;
2419 }
2420 llvm_unreachable("Unexpected runtime schedule");
2421}
2422
2423/// Map the OpenMP distribute schedule to the runtime enumeration.
2424static OpenMPSchedType
2426 // only static is allowed for dist_schedule
2427 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2428}
2429
2431 bool Chunked) const {
2432 OpenMPSchedType Schedule =
2433 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2434 return Schedule == OMP_sch_static;
2435}
2436
2438 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2439 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2440 return Schedule == OMP_dist_sch_static;
2441}
2442
2444 bool Chunked) const {
2445 OpenMPSchedType Schedule =
2446 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2447 return Schedule == OMP_sch_static_chunked;
2448}
2449
2451 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2452 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2453 return Schedule == OMP_dist_sch_static_chunked;
2454}
2455
2457 OpenMPSchedType Schedule =
2458 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2459 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2460 return Schedule != OMP_sch_static;
2461}
2462
2463static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2466 int Modifier = 0;
2467 switch (M1) {
2468 case OMPC_SCHEDULE_MODIFIER_monotonic:
2469 Modifier = OMP_sch_modifier_monotonic;
2470 break;
2471 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2472 Modifier = OMP_sch_modifier_nonmonotonic;
2473 break;
2474 case OMPC_SCHEDULE_MODIFIER_simd:
2475 if (Schedule == OMP_sch_static_chunked)
2476 Schedule = OMP_sch_static_balanced_chunked;
2477 break;
2480 break;
2481 }
2482 switch (M2) {
2483 case OMPC_SCHEDULE_MODIFIER_monotonic:
2484 Modifier = OMP_sch_modifier_monotonic;
2485 break;
2486 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2487 Modifier = OMP_sch_modifier_nonmonotonic;
2488 break;
2489 case OMPC_SCHEDULE_MODIFIER_simd:
2490 if (Schedule == OMP_sch_static_chunked)
2491 Schedule = OMP_sch_static_balanced_chunked;
2492 break;
2495 break;
2496 }
2497 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2498 // If the static schedule kind is specified or if the ordered clause is
2499 // specified, and if the nonmonotonic modifier is not specified, the effect is
2500 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2501 // modifier is specified, the effect is as if the nonmonotonic modifier is
2502 // specified.
2503 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2504 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2505 Schedule == OMP_sch_static_balanced_chunked ||
2506 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2507 Schedule == OMP_dist_sch_static_chunked ||
2508 Schedule == OMP_dist_sch_static))
2509 Modifier = OMP_sch_modifier_nonmonotonic;
2510 }
2511 return Schedule | Modifier;
2512}
2513
2516 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2517 bool Ordered, const DispatchRTInput &DispatchValues) {
2518 if (!CGF.HaveInsertPoint())
2519 return;
2520 OpenMPSchedType Schedule = getRuntimeSchedule(
2521 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2522 assert(Ordered ||
2523 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2524 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2525 Schedule != OMP_sch_static_balanced_chunked));
2526 // Call __kmpc_dispatch_init(
2527 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2528 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2529 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2530
2531 // If the Chunk was not specified in the clause - use default value 1.
2532 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2533 : CGF.Builder.getIntN(IVSize, 1);
2534 llvm::Value *Args[] = {
2535 emitUpdateLocation(CGF, Loc),
2536 getThreadID(CGF, Loc),
2537 CGF.Builder.getInt32(addMonoNonMonoModifier(
2538 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2539 DispatchValues.LB, // Lower
2540 DispatchValues.UB, // Upper
2541 CGF.Builder.getIntN(IVSize, 1), // Stride
2542 Chunk // Chunk
2543 };
2544 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2545 Args);
2546}
2547
2549 SourceLocation Loc) {
2550 if (!CGF.HaveInsertPoint())
2551 return;
2552 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2553 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2554 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2555}
2556
2558 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2559 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2561 const CGOpenMPRuntime::StaticRTInput &Values) {
2562 if (!CGF.HaveInsertPoint())
2563 return;
2564
2565 assert(!Values.Ordered);
2566 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2567 Schedule == OMP_sch_static_balanced_chunked ||
2568 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2569 Schedule == OMP_dist_sch_static ||
2570 Schedule == OMP_dist_sch_static_chunked);
2571
2572 // Call __kmpc_for_static_init(
2573 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2574 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2575 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2576 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2577 llvm::Value *Chunk = Values.Chunk;
2578 if (Chunk == nullptr) {
2579 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2580 Schedule == OMP_dist_sch_static) &&
2581 "expected static non-chunked schedule");
2582 // If the Chunk was not specified in the clause - use default value 1.
2583 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2584 } else {
2585 assert((Schedule == OMP_sch_static_chunked ||
2586 Schedule == OMP_sch_static_balanced_chunked ||
2587 Schedule == OMP_ord_static_chunked ||
2588 Schedule == OMP_dist_sch_static_chunked) &&
2589 "expected static chunked schedule");
2590 }
2591 llvm::Value *Args[] = {
2592 UpdateLocation,
2593 ThreadId,
2594 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2595 M2)), // Schedule type
2596 Values.IL.emitRawPointer(CGF), // &isLastIter
2597 Values.LB.emitRawPointer(CGF), // &LB
2598 Values.UB.emitRawPointer(CGF), // &UB
2599 Values.ST.emitRawPointer(CGF), // &Stride
2600 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2601 Chunk // Chunk
2602 };
2603 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2604}
2605
2607 SourceLocation Loc,
2608 OpenMPDirectiveKind DKind,
2609 const OpenMPScheduleTy &ScheduleKind,
2610 const StaticRTInput &Values) {
2611 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2612 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2613 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2614 "Expected loop-based or sections-based directive.");
2615 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2617 ? OMP_IDENT_WORK_LOOP
2618 : OMP_IDENT_WORK_SECTIONS);
2619 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2620 llvm::FunctionCallee StaticInitFunction =
2621 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2622 false);
2624 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2625 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2626}
2627
2631 const CGOpenMPRuntime::StaticRTInput &Values) {
2632 OpenMPSchedType ScheduleNum =
2633 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2634 llvm::Value *UpdatedLocation =
2635 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2636 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2637 llvm::FunctionCallee StaticInitFunction;
2638 bool isGPUDistribute =
2639 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2640 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2641 Values.IVSize, Values.IVSigned, isGPUDistribute);
2642
2643 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2644 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2646}
2647
2649 SourceLocation Loc,
2650 OpenMPDirectiveKind DKind) {
2651 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2652 DKind == OMPD_sections) &&
2653 "Expected distribute, for, or sections directive kind");
2654 if (!CGF.HaveInsertPoint())
2655 return;
2656 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2657 llvm::Value *Args[] = {
2658 emitUpdateLocation(CGF, Loc,
2660 (DKind == OMPD_target_teams_loop)
2661 ? OMP_IDENT_WORK_DISTRIBUTE
2662 : isOpenMPLoopDirective(DKind)
2663 ? OMP_IDENT_WORK_LOOP
2664 : OMP_IDENT_WORK_SECTIONS),
2665 getThreadID(CGF, Loc)};
2667 if (isOpenMPDistributeDirective(DKind) &&
2668 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2669 CGF.EmitRuntimeCall(
2670 OMPBuilder.getOrCreateRuntimeFunction(
2671 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2672 Args);
2673 else
2674 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2675 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2676 Args);
2677}
2678
2680 SourceLocation Loc,
2681 unsigned IVSize,
2682 bool IVSigned) {
2683 if (!CGF.HaveInsertPoint())
2684 return;
2685 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2686 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2687 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2688 Args);
2689}
2690
2692 SourceLocation Loc, unsigned IVSize,
2693 bool IVSigned, Address IL,
2694 Address LB, Address UB,
2695 Address ST) {
2696 // Call __kmpc_dispatch_next(
2697 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2698 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2699 // kmp_int[32|64] *p_stride);
2700 llvm::Value *Args[] = {
2701 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2702 IL.emitRawPointer(CGF), // &isLastIter
2703 LB.emitRawPointer(CGF), // &Lower
2704 UB.emitRawPointer(CGF), // &Upper
2705 ST.emitRawPointer(CGF) // &Stride
2706 };
2707 llvm::Value *Call = CGF.EmitRuntimeCall(
2708 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2709 return CGF.EmitScalarConversion(
2710 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2711 CGF.getContext().BoolTy, Loc);
2712}
2713
2715 const Expr *Message,
2716 SourceLocation Loc) {
2717 if (!Message)
2718 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2719 return CGF.EmitScalarExpr(Message);
2720}
2721
2722llvm::Value *
2724 SourceLocation Loc) {
2725 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2726 // as if sev-level is fatal."
2727 return llvm::ConstantInt::get(CGM.Int32Ty,
2728 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2729}
2730
2732 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2734 SourceLocation SeverityLoc, const Expr *Message,
2735 SourceLocation MessageLoc) {
2736 if (!CGF.HaveInsertPoint())
2737 return;
2739 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2740 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2741 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2742 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2743 // messsage) if strict modifier is used.
2744 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2745 if (Modifier == OMPC_NUMTHREADS_strict) {
2746 FnID = OMPRTL___kmpc_push_num_threads_strict;
2747 Args.push_back(emitSeverityClause(Severity, SeverityLoc));
2748 Args.push_back(emitMessageClause(CGF, Message, MessageLoc));
2749 }
2750 CGF.EmitRuntimeCall(
2751 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2752}
2753
2755 ProcBindKind ProcBind,
2756 SourceLocation Loc) {
2757 if (!CGF.HaveInsertPoint())
2758 return;
2759 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2760 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2761 llvm::Value *Args[] = {
2762 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2763 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2764 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2765 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2766 Args);
2767}
2768
2770 SourceLocation Loc, llvm::AtomicOrdering AO) {
2771 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2772 OMPBuilder.createFlush(CGF.Builder);
2773 } else {
2774 if (!CGF.HaveInsertPoint())
2775 return;
2776 // Build call void __kmpc_flush(ident_t *loc)
2777 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2778 CGM.getModule(), OMPRTL___kmpc_flush),
2779 emitUpdateLocation(CGF, Loc));
2780 }
2781}
2782
2783namespace {
2784/// Indexes of fields for type kmp_task_t.
2785enum KmpTaskTFields {
2786 /// List of shared variables.
2787 KmpTaskTShareds,
2788 /// Task routine.
2789 KmpTaskTRoutine,
2790 /// Partition id for the untied tasks.
2791 KmpTaskTPartId,
2792 /// Function with call of destructors for private variables.
2793 Data1,
2794 /// Task priority.
2795 Data2,
2796 /// (Taskloops only) Lower bound.
2797 KmpTaskTLowerBound,
2798 /// (Taskloops only) Upper bound.
2799 KmpTaskTUpperBound,
2800 /// (Taskloops only) Stride.
2801 KmpTaskTStride,
2802 /// (Taskloops only) Is last iteration flag.
2803 KmpTaskTLastIter,
2804 /// (Taskloops only) Reduction data.
2805 KmpTaskTReductions,
2806};
2807} // anonymous namespace
2808
2810 // If we are in simd mode or there are no entries, we don't need to do
2811 // anything.
2812 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2813 return;
2814
2815 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2816 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2817 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2818 SourceLocation Loc;
2819 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2820 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2821 E = CGM.getContext().getSourceManager().fileinfo_end();
2822 I != E; ++I) {
2823 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2824 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2825 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2826 I->getFirst(), EntryInfo.Line, 1);
2827 break;
2828 }
2829 }
2830 }
2831 switch (Kind) {
2832 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2833 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2834 DiagnosticsEngine::Error, "Offloading entry for target region in "
2835 "%0 is incorrect: either the "
2836 "address or the ID is invalid.");
2837 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2838 } break;
2839 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2840 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2841 DiagnosticsEngine::Error, "Offloading entry for declare target "
2842 "variable %0 is incorrect: the "
2843 "address is invalid.");
2844 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2845 } break;
2846 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2847 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2849 "Offloading entry for declare target variable is incorrect: the "
2850 "address is invalid.");
2851 CGM.getDiags().Report(DiagID);
2852 } break;
2853 }
2854 };
2855
2856 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2857}
2858
2860 if (!KmpRoutineEntryPtrTy) {
2861 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2862 ASTContext &C = CGM.getContext();
2863 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2865 KmpRoutineEntryPtrQTy = C.getPointerType(
2866 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2867 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2868 }
2869}
2870
2871namespace {
2872struct PrivateHelpersTy {
2873 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2874 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2875 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2876 PrivateElemInit(PrivateElemInit) {}
2877 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2878 const Expr *OriginalRef = nullptr;
2879 const VarDecl *Original = nullptr;
2880 const VarDecl *PrivateCopy = nullptr;
2881 const VarDecl *PrivateElemInit = nullptr;
2882 bool isLocalPrivate() const {
2883 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2884 }
2885};
2886typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2887} // anonymous namespace
2888
2889static bool isAllocatableDecl(const VarDecl *VD) {
2890 const VarDecl *CVD = VD->getCanonicalDecl();
2891 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2892 return false;
2893 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2894 // Use the default allocation.
2895 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2896 !AA->getAllocator());
2897}
2898
2899static RecordDecl *
2901 if (!Privates.empty()) {
2902 ASTContext &C = CGM.getContext();
2903 // Build struct .kmp_privates_t. {
2904 // /* private vars */
2905 // };
2906 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2907 RD->startDefinition();
2908 for (const auto &Pair : Privates) {
2909 const VarDecl *VD = Pair.second.Original;
2911 // If the private variable is a local variable with lvalue ref type,
2912 // allocate the pointer instead of the pointee type.
2913 if (Pair.second.isLocalPrivate()) {
2914 if (VD->getType()->isLValueReferenceType())
2915 Type = C.getPointerType(Type);
2916 if (isAllocatableDecl(VD))
2917 Type = C.getPointerType(Type);
2918 }
2920 if (VD->hasAttrs()) {
2921 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2922 E(VD->getAttrs().end());
2923 I != E; ++I)
2924 FD->addAttr(*I);
2925 }
2926 }
2927 RD->completeDefinition();
2928 return RD;
2929 }
2930 return nullptr;
2931}
2932
2933static RecordDecl *
2935 QualType KmpInt32Ty,
2936 QualType KmpRoutineEntryPointerQTy) {
2937 ASTContext &C = CGM.getContext();
2938 // Build struct kmp_task_t {
2939 // void * shareds;
2940 // kmp_routine_entry_t routine;
2941 // kmp_int32 part_id;
2942 // kmp_cmplrdata_t data1;
2943 // kmp_cmplrdata_t data2;
2944 // For taskloops additional fields:
2945 // kmp_uint64 lb;
2946 // kmp_uint64 ub;
2947 // kmp_int64 st;
2948 // kmp_int32 liter;
2949 // void * reductions;
2950 // };
2951 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2952 UD->startDefinition();
2953 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2954 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2955 UD->completeDefinition();
2956 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
2957 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2958 RD->startDefinition();
2959 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2960 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2961 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2962 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2963 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2964 if (isOpenMPTaskLoopDirective(Kind)) {
2965 QualType KmpUInt64Ty =
2966 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2967 QualType KmpInt64Ty =
2968 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2969 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2970 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2971 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2972 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2973 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2974 }
2975 RD->completeDefinition();
2976 return RD;
2977}
2978
2979static RecordDecl *
2981 ArrayRef<PrivateDataTy> Privates) {
2982 ASTContext &C = CGM.getContext();
2983 // Build struct kmp_task_t_with_privates {
2984 // kmp_task_t task_data;
2985 // .kmp_privates_t. privates;
2986 // };
2987 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2988 RD->startDefinition();
2989 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2990 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2991 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
2992 RD->completeDefinition();
2993 return RD;
2994}
2995
2996/// Emit a proxy function which accepts kmp_task_t as the second
2997/// argument.
2998/// \code
2999/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3000/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3001/// For taskloops:
3002/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3003/// tt->reductions, tt->shareds);
3004/// return 0;
3005/// }
3006/// \endcode
3007static llvm::Function *
3009 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3010 QualType KmpTaskTWithPrivatesPtrQTy,
3011 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3012 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3013 llvm::Value *TaskPrivatesMap) {
3014 ASTContext &C = CGM.getContext();
3015 FunctionArgList Args;
3016 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3018 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3019 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3021 Args.push_back(&GtidArg);
3022 Args.push_back(&TaskTypeArg);
3023 const auto &TaskEntryFnInfo =
3024 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3025 llvm::FunctionType *TaskEntryTy =
3026 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3027 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3028 auto *TaskEntry = llvm::Function::Create(
3029 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3030 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3031 TaskEntry->setDoesNotRecurse();
3032 CodeGenFunction CGF(CGM);
3033 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3034 Loc, Loc);
3035
3036 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3037 // tt,
3038 // For taskloops:
3039 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3040 // tt->task_data.shareds);
3041 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3042 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3043 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3044 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3045 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3046 const auto *KmpTaskTWithPrivatesQTyRD =
3047 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3048 LValue Base =
3049 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3050 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3051 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3052 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3053 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3054
3055 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3056 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3057 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3058 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3059 CGF.ConvertTypeForMem(SharedsPtrTy));
3060
3061 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3062 llvm::Value *PrivatesParam;
3063 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3064 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3065 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3066 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3067 } else {
3068 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3069 }
3070
3071 llvm::Value *CommonArgs[] = {
3072 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3073 CGF.Builder
3074 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3075 CGF.VoidPtrTy, CGF.Int8Ty)
3076 .emitRawPointer(CGF)};
3077 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3078 std::end(CommonArgs));
3079 if (isOpenMPTaskLoopDirective(Kind)) {
3080 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3081 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3082 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3083 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3084 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3085 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3086 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3087 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3088 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3089 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3090 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3091 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3092 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3093 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3094 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3095 CallArgs.push_back(LBParam);
3096 CallArgs.push_back(UBParam);
3097 CallArgs.push_back(StParam);
3098 CallArgs.push_back(LIParam);
3099 CallArgs.push_back(RParam);
3100 }
3101 CallArgs.push_back(SharedsParam);
3102
3103 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3104 CallArgs);
3105 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3106 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3107 CGF.FinishFunction();
3108 return TaskEntry;
3109}
3110
3112 SourceLocation Loc,
3113 QualType KmpInt32Ty,
3114 QualType KmpTaskTWithPrivatesPtrQTy,
3115 QualType KmpTaskTWithPrivatesQTy) {
3116 ASTContext &C = CGM.getContext();
3117 FunctionArgList Args;
3118 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3120 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3121 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3123 Args.push_back(&GtidArg);
3124 Args.push_back(&TaskTypeArg);
3125 const auto &DestructorFnInfo =
3126 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3127 llvm::FunctionType *DestructorFnTy =
3128 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3129 std::string Name =
3130 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3131 auto *DestructorFn =
3132 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3133 Name, &CGM.getModule());
3134 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3135 DestructorFnInfo);
3136 DestructorFn->setDoesNotRecurse();
3137 CodeGenFunction CGF(CGM);
3138 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3139 Args, Loc, Loc);
3140
3141 LValue Base = CGF.EmitLoadOfPointerLValue(
3142 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3143 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3144 const auto *KmpTaskTWithPrivatesQTyRD =
3145 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3146 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3147 Base = CGF.EmitLValueForField(Base, *FI);
3148 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3149 if (QualType::DestructionKind DtorKind =
3150 Field->getType().isDestructedType()) {
3151 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3152 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3153 }
3154 }
3155 CGF.FinishFunction();
3156 return DestructorFn;
3157}
3158
3159/// Emit a privates mapping function for correct handling of private and
3160/// firstprivate variables.
3161/// \code
3162/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3163/// **noalias priv1,..., <tyn> **noalias privn) {
3164/// *priv1 = &.privates.priv1;
3165/// ...;
3166/// *privn = &.privates.privn;
3167/// }
3168/// \endcode
3169static llvm::Value *
3171 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3172 ArrayRef<PrivateDataTy> Privates) {
3173 ASTContext &C = CGM.getContext();
3174 FunctionArgList Args;
3175 ImplicitParamDecl TaskPrivatesArg(
3176 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3177 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3179 Args.push_back(&TaskPrivatesArg);
3180 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3181 unsigned Counter = 1;
3182 for (const Expr *E : Data.PrivateVars) {
3183 Args.push_back(ImplicitParamDecl::Create(
3184 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3185 C.getPointerType(C.getPointerType(E->getType()))
3186 .withConst()
3187 .withRestrict(),
3189 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3190 PrivateVarsPos[VD] = Counter;
3191 ++Counter;
3192 }
3193 for (const Expr *E : Data.FirstprivateVars) {
3194 Args.push_back(ImplicitParamDecl::Create(
3195 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3196 C.getPointerType(C.getPointerType(E->getType()))
3197 .withConst()
3198 .withRestrict(),
3200 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3201 PrivateVarsPos[VD] = Counter;
3202 ++Counter;
3203 }
3204 for (const Expr *E : Data.LastprivateVars) {
3205 Args.push_back(ImplicitParamDecl::Create(
3206 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3207 C.getPointerType(C.getPointerType(E->getType()))
3208 .withConst()
3209 .withRestrict(),
3211 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3212 PrivateVarsPos[VD] = Counter;
3213 ++Counter;
3214 }
3215 for (const VarDecl *VD : Data.PrivateLocals) {
3217 if (VD->getType()->isLValueReferenceType())
3218 Ty = C.getPointerType(Ty);
3219 if (isAllocatableDecl(VD))
3220 Ty = C.getPointerType(Ty);
3221 Args.push_back(ImplicitParamDecl::Create(
3222 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3223 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3225 PrivateVarsPos[VD] = Counter;
3226 ++Counter;
3227 }
3228 const auto &TaskPrivatesMapFnInfo =
3229 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3230 llvm::FunctionType *TaskPrivatesMapTy =
3231 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3232 std::string Name =
3233 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3234 auto *TaskPrivatesMap = llvm::Function::Create(
3235 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3236 &CGM.getModule());
3237 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3238 TaskPrivatesMapFnInfo);
3239 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3240 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3241 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3242 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3243 }
3244 CodeGenFunction CGF(CGM);
3245 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3246 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3247
3248 // *privi = &.privates.privi;
3249 LValue Base = CGF.EmitLoadOfPointerLValue(
3250 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3251 TaskPrivatesArg.getType()->castAs<PointerType>());
3252 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3253 Counter = 0;
3254 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3255 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3256 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3257 LValue RefLVal =
3258 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3259 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3260 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3261 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3262 ++Counter;
3263 }
3264 CGF.FinishFunction();
3265 return TaskPrivatesMap;
3266}
3267
3268/// Emit initialization for private variables in task-based directives.
3270 const OMPExecutableDirective &D,
3271 Address KmpTaskSharedsPtr, LValue TDBase,
3272 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3273 QualType SharedsTy, QualType SharedsPtrTy,
3274 const OMPTaskDataTy &Data,
3275 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3276 ASTContext &C = CGF.getContext();
3277 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3278 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3279 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3280 ? OMPD_taskloop
3281 : OMPD_task;
3282 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3283 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3284 LValue SrcBase;
3285 bool IsTargetTask =
3286 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3287 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3288 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3289 // PointersArray, SizesArray, and MappersArray. The original variables for
3290 // these arrays are not captured and we get their addresses explicitly.
3291 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3292 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3293 SrcBase = CGF.MakeAddrLValue(
3295 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3296 CGF.ConvertTypeForMem(SharedsTy)),
3297 SharedsTy);
3298 }
3299 FI = FI->getType()->castAsRecordDecl()->field_begin();
3300 for (const PrivateDataTy &Pair : Privates) {
3301 // Do not initialize private locals.
3302 if (Pair.second.isLocalPrivate()) {
3303 ++FI;
3304 continue;
3305 }
3306 const VarDecl *VD = Pair.second.PrivateCopy;
3307 const Expr *Init = VD->getAnyInitializer();
3308 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3309 !CGF.isTrivialInitializer(Init)))) {
3310 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3311 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3312 const VarDecl *OriginalVD = Pair.second.Original;
3313 // Check if the variable is the target-based BasePointersArray,
3314 // PointersArray, SizesArray, or MappersArray.
3315 LValue SharedRefLValue;
3316 QualType Type = PrivateLValue.getType();
3317 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3318 if (IsTargetTask && !SharedField) {
3319 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3320 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3321 cast<CapturedDecl>(OriginalVD->getDeclContext())
3322 ->getNumParams() == 0 &&
3324 cast<CapturedDecl>(OriginalVD->getDeclContext())
3325 ->getDeclContext()) &&
3326 "Expected artificial target data variable.");
3327 SharedRefLValue =
3328 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3329 } else if (ForDup) {
3330 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3331 SharedRefLValue = CGF.MakeAddrLValue(
3332 SharedRefLValue.getAddress().withAlignment(
3333 C.getDeclAlign(OriginalVD)),
3334 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3335 SharedRefLValue.getTBAAInfo());
3336 } else if (CGF.LambdaCaptureFields.count(
3337 Pair.second.Original->getCanonicalDecl()) > 0 ||
3338 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3339 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3340 } else {
3341 // Processing for implicitly captured variables.
3342 InlinedOpenMPRegionRAII Region(
3343 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3344 /*HasCancel=*/false, /*NoInheritance=*/true);
3345 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3346 }
3347 if (Type->isArrayType()) {
3348 // Initialize firstprivate array.
3350 // Perform simple memcpy.
3351 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3352 } else {
3353 // Initialize firstprivate array using element-by-element
3354 // initialization.
3356 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3357 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3358 Address SrcElement) {
3359 // Clean up any temporaries needed by the initialization.
3360 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3361 InitScope.addPrivate(Elem, SrcElement);
3362 (void)InitScope.Privatize();
3363 // Emit initialization for single element.
3364 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3365 CGF, &CapturesInfo);
3366 CGF.EmitAnyExprToMem(Init, DestElement,
3367 Init->getType().getQualifiers(),
3368 /*IsInitializer=*/false);
3369 });
3370 }
3371 } else {
3372 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3373 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3374 (void)InitScope.Privatize();
3375 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3376 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3377 /*capturedByInit=*/false);
3378 }
3379 } else {
3380 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3381 }
3382 }
3383 ++FI;
3384 }
3385}
3386
3387/// Check if duplication function is required for taskloops.
3389 ArrayRef<PrivateDataTy> Privates) {
3390 bool InitRequired = false;
3391 for (const PrivateDataTy &Pair : Privates) {
3392 if (Pair.second.isLocalPrivate())
3393 continue;
3394 const VarDecl *VD = Pair.second.PrivateCopy;
3395 const Expr *Init = VD->getAnyInitializer();
3396 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3398 if (InitRequired)
3399 break;
3400 }
3401 return InitRequired;
3402}
3403
3404
3405/// Emit task_dup function (for initialization of
3406/// private/firstprivate/lastprivate vars and last_iter flag)
3407/// \code
3408/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3409/// lastpriv) {
3410/// // setup lastprivate flag
3411/// task_dst->last = lastpriv;
3412/// // could be constructor calls here...
3413/// }
3414/// \endcode
3415static llvm::Value *
3417 const OMPExecutableDirective &D,
3418 QualType KmpTaskTWithPrivatesPtrQTy,
3419 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3420 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3421 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3422 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3423 ASTContext &C = CGM.getContext();
3424 FunctionArgList Args;
3425 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3426 KmpTaskTWithPrivatesPtrQTy,
3428 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3429 KmpTaskTWithPrivatesPtrQTy,
3431 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3433 Args.push_back(&DstArg);
3434 Args.push_back(&SrcArg);
3435 Args.push_back(&LastprivArg);
3436 const auto &TaskDupFnInfo =
3437 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3438 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3439 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3440 auto *TaskDup = llvm::Function::Create(
3441 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3442 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3443 TaskDup->setDoesNotRecurse();
3444 CodeGenFunction CGF(CGM);
3445 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3446 Loc);
3447
3448 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3449 CGF.GetAddrOfLocalVar(&DstArg),
3450 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3451 // task_dst->liter = lastpriv;
3452 if (WithLastIter) {
3453 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3454 LValue Base = CGF.EmitLValueForField(
3455 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3456 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3457 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3458 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3459 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3460 }
3461
3462 // Emit initial values for private copies (if any).
3463 assert(!Privates.empty());
3464 Address KmpTaskSharedsPtr = Address::invalid();
3465 if (!Data.FirstprivateVars.empty()) {
3466 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3467 CGF.GetAddrOfLocalVar(&SrcArg),
3468 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3469 LValue Base = CGF.EmitLValueForField(
3470 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3471 KmpTaskSharedsPtr = Address(
3473 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3474 KmpTaskTShareds)),
3475 Loc),
3476 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3477 }
3478 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3479 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3480 CGF.FinishFunction();
3481 return TaskDup;
3482}
3483
3484/// Checks if destructor function is required to be generated.
3485/// \return true if cleanups are required, false otherwise.
3486static bool
3487checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3488 ArrayRef<PrivateDataTy> Privates) {
3489 for (const PrivateDataTy &P : Privates) {
3490 if (P.second.isLocalPrivate())
3491 continue;
3492 QualType Ty = P.second.Original->getType().getNonReferenceType();
3493 if (Ty.isDestructedType())
3494 return true;
3495 }
3496 return false;
3497}
3498
3499namespace {
3500/// Loop generator for OpenMP iterator expression.
3501class OMPIteratorGeneratorScope final
3503 CodeGenFunction &CGF;
3504 const OMPIteratorExpr *E = nullptr;
3505 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3506 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3507 OMPIteratorGeneratorScope() = delete;
3508 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3509
3510public:
3511 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3512 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3513 if (!E)
3514 return;
3515 SmallVector<llvm::Value *, 4> Uppers;
3516 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3517 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3518 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3519 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3520 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3521 addPrivate(
3522 HelperData.CounterVD,
3523 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3524 }
3525 Privatize();
3526
3527 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3528 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3529 LValue CLVal =
3530 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3531 HelperData.CounterVD->getType());
3532 // Counter = 0;
3533 CGF.EmitStoreOfScalar(
3534 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3535 CLVal);
3536 CodeGenFunction::JumpDest &ContDest =
3537 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3538 CodeGenFunction::JumpDest &ExitDest =
3539 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3540 // N = <number-of_iterations>;
3541 llvm::Value *N = Uppers[I];
3542 // cont:
3543 // if (Counter < N) goto body; else goto exit;
3544 CGF.EmitBlock(ContDest.getBlock());
3545 auto *CVal =
3546 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3547 llvm::Value *Cmp =
3548 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3549 ? CGF.Builder.CreateICmpSLT(CVal, N)
3550 : CGF.Builder.CreateICmpULT(CVal, N);
3551 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3552 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3553 // body:
3554 CGF.EmitBlock(BodyBB);
3555 // Iteri = Begini + Counter * Stepi;
3556 CGF.EmitIgnoredExpr(HelperData.Update);
3557 }
3558 }
3559 ~OMPIteratorGeneratorScope() {
3560 if (!E)
3561 return;
3562 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3563 // Counter = Counter + 1;
3564 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3565 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3566 // goto cont;
3567 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3568 // exit:
3569 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3570 }
3571 }
3572};
3573} // namespace
3574
3575static std::pair<llvm::Value *, llvm::Value *>
3577 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3578 llvm::Value *Addr;
3579 if (OASE) {
3580 const Expr *Base = OASE->getBase();
3581 Addr = CGF.EmitScalarExpr(Base);
3582 } else {
3583 Addr = CGF.EmitLValue(E).getPointer(CGF);
3584 }
3585 llvm::Value *SizeVal;
3586 QualType Ty = E->getType();
3587 if (OASE) {
3588 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3589 for (const Expr *SE : OASE->getDimensions()) {
3590 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3591 Sz = CGF.EmitScalarConversion(
3592 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3593 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3594 }
3595 } else if (const auto *ASE =
3596 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3597 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3598 Address UpAddrAddress = UpAddrLVal.getAddress();
3599 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3600 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3601 /*Idx0=*/1);
3602 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3603 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3604 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3605 } else {
3606 SizeVal = CGF.getTypeSize(Ty);
3607 }
3608 return std::make_pair(Addr, SizeVal);
3609}
3610
3611/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3612static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3613 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3614 if (KmpTaskAffinityInfoTy.isNull()) {
3615 RecordDecl *KmpAffinityInfoRD =
3616 C.buildImplicitRecord("kmp_task_affinity_info_t");
3617 KmpAffinityInfoRD->startDefinition();
3618 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3619 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3620 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3621 KmpAffinityInfoRD->completeDefinition();
3622 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3623 }
3624}
3625
3628 const OMPExecutableDirective &D,
3629 llvm::Function *TaskFunction, QualType SharedsTy,
3630 Address Shareds, const OMPTaskDataTy &Data) {
3631 ASTContext &C = CGM.getContext();
3633 // Aggregate privates and sort them by the alignment.
3634 const auto *I = Data.PrivateCopies.begin();
3635 for (const Expr *E : Data.PrivateVars) {
3636 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3637 Privates.emplace_back(
3638 C.getDeclAlign(VD),
3639 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3640 /*PrivateElemInit=*/nullptr));
3641 ++I;
3642 }
3643 I = Data.FirstprivateCopies.begin();
3644 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3645 for (const Expr *E : Data.FirstprivateVars) {
3646 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3647 Privates.emplace_back(
3648 C.getDeclAlign(VD),
3649 PrivateHelpersTy(
3650 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3651 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3652 ++I;
3653 ++IElemInitRef;
3654 }
3655 I = Data.LastprivateCopies.begin();
3656 for (const Expr *E : Data.LastprivateVars) {
3657 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3658 Privates.emplace_back(
3659 C.getDeclAlign(VD),
3660 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3661 /*PrivateElemInit=*/nullptr));
3662 ++I;
3663 }
3664 for (const VarDecl *VD : Data.PrivateLocals) {
3665 if (isAllocatableDecl(VD))
3666 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3667 else
3668 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3669 }
3670 llvm::stable_sort(Privates,
3671 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3672 return L.first > R.first;
3673 });
3674 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3675 // Build type kmp_routine_entry_t (if not built yet).
3676 emitKmpRoutineEntryT(KmpInt32Ty);
3677 // Build type kmp_task_t (if not built yet).
3678 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3679 if (SavedKmpTaskloopTQTy.isNull()) {
3680 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3681 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3682 }
3684 } else {
3685 assert((D.getDirectiveKind() == OMPD_task ||
3686 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3687 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3688 "Expected taskloop, task or target directive");
3689 if (SavedKmpTaskTQTy.isNull()) {
3690 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3691 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3692 }
3694 }
3695 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3696 // Build particular struct kmp_task_t for the given task.
3697 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3699 CanQualType KmpTaskTWithPrivatesQTy =
3700 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3701 QualType KmpTaskTWithPrivatesPtrQTy =
3702 C.getPointerType(KmpTaskTWithPrivatesQTy);
3703 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3704 llvm::Value *KmpTaskTWithPrivatesTySize =
3705 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3706 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3707
3708 // Emit initial values for private copies (if any).
3709 llvm::Value *TaskPrivatesMap = nullptr;
3710 llvm::Type *TaskPrivatesMapTy =
3711 std::next(TaskFunction->arg_begin(), 3)->getType();
3712 if (!Privates.empty()) {
3713 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3714 TaskPrivatesMap =
3715 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3716 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3717 TaskPrivatesMap, TaskPrivatesMapTy);
3718 } else {
3719 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3720 cast<llvm::PointerType>(TaskPrivatesMapTy));
3721 }
3722 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3723 // kmp_task_t *tt);
3724 llvm::Function *TaskEntry = emitProxyTaskFunction(
3725 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3726 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3727 TaskPrivatesMap);
3728
3729 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3730 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3731 // kmp_routine_entry_t *task_entry);
3732 // Task flags. Format is taken from
3733 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3734 // description of kmp_tasking_flags struct.
3735 enum {
3736 TiedFlag = 0x1,
3737 FinalFlag = 0x2,
3738 DestructorsFlag = 0x8,
3739 PriorityFlag = 0x20,
3740 DetachableFlag = 0x40,
3741 FreeAgentFlag = 0x80,
3742 };
3743 unsigned Flags = Data.Tied ? TiedFlag : 0;
3744 bool NeedsCleanup = false;
3745 if (!Privates.empty()) {
3746 NeedsCleanup =
3747 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3748 if (NeedsCleanup)
3749 Flags = Flags | DestructorsFlag;
3750 }
3751 if (const auto *Clause = D.getSingleClause<OMPThreadsetClause>()) {
3752 OpenMPThreadsetKind Kind = Clause->getThreadsetKind();
3753 if (Kind == OMPC_THREADSET_omp_pool)
3754 Flags = Flags | FreeAgentFlag;
3755 }
3756 if (Data.Priority.getInt())
3757 Flags = Flags | PriorityFlag;
3758 if (D.hasClausesOfKind<OMPDetachClause>())
3759 Flags = Flags | DetachableFlag;
3760 llvm::Value *TaskFlags =
3761 Data.Final.getPointer()
3762 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3763 CGF.Builder.getInt32(FinalFlag),
3764 CGF.Builder.getInt32(/*C=*/0))
3765 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3766 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3767 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3769 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3771 TaskEntry, KmpRoutineEntryPtrTy)};
3772 llvm::Value *NewTask;
3773 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3774 // Check if we have any device clause associated with the directive.
3775 const Expr *Device = nullptr;
3776 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3777 Device = C->getDevice();
3778 // Emit device ID if any otherwise use default value.
3779 llvm::Value *DeviceID;
3780 if (Device)
3781 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3782 CGF.Int64Ty, /*isSigned=*/true);
3783 else
3784 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3785 AllocArgs.push_back(DeviceID);
3786 NewTask = CGF.EmitRuntimeCall(
3787 OMPBuilder.getOrCreateRuntimeFunction(
3788 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3789 AllocArgs);
3790 } else {
3791 NewTask =
3792 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3793 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3794 AllocArgs);
3795 }
3796 // Emit detach clause initialization.
3797 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3798 // task_descriptor);
3799 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3800 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3801 LValue EvtLVal = CGF.EmitLValue(Evt);
3802
3803 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3804 // int gtid, kmp_task_t *task);
3805 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3806 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3807 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3808 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3809 OMPBuilder.getOrCreateRuntimeFunction(
3810 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3811 {Loc, Tid, NewTask});
3812 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3813 Evt->getExprLoc());
3814 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3815 }
3816 // Process affinity clauses.
3817 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3818 // Process list of affinity data.
3819 ASTContext &C = CGM.getContext();
3820 Address AffinitiesArray = Address::invalid();
3821 // Calculate number of elements to form the array of affinity data.
3822 llvm::Value *NumOfElements = nullptr;
3823 unsigned NumAffinities = 0;
3824 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3825 if (const Expr *Modifier = C->getModifier()) {
3826 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3827 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3828 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3829 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3830 NumOfElements =
3831 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3832 }
3833 } else {
3834 NumAffinities += C->varlist_size();
3835 }
3836 }
3838 // Fields ids in kmp_task_affinity_info record.
3839 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3840
3841 QualType KmpTaskAffinityInfoArrayTy;
3842 if (NumOfElements) {
3843 NumOfElements = CGF.Builder.CreateNUWAdd(
3844 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3845 auto *OVE = new (C) OpaqueValueExpr(
3846 Loc,
3847 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3848 VK_PRValue);
3849 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3850 RValue::get(NumOfElements));
3851 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3853 /*IndexTypeQuals=*/0);
3854 // Properly emit variable-sized array.
3855 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3857 CGF.EmitVarDecl(*PD);
3858 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3859 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3860 /*isSigned=*/false);
3861 } else {
3862 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3864 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3865 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3866 AffinitiesArray =
3867 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3868 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3869 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3870 /*isSigned=*/false);
3871 }
3872
3873 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3874 // Fill array by elements without iterators.
3875 unsigned Pos = 0;
3876 bool HasIterator = false;
3877 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3878 if (C->getModifier()) {
3879 HasIterator = true;
3880 continue;
3881 }
3882 for (const Expr *E : C->varlist()) {
3883 llvm::Value *Addr;
3884 llvm::Value *Size;
3885 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3886 LValue Base =
3887 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3889 // affs[i].base_addr = &<Affinities[i].second>;
3890 LValue BaseAddrLVal = CGF.EmitLValueForField(
3891 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3892 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3893 BaseAddrLVal);
3894 // affs[i].len = sizeof(<Affinities[i].second>);
3895 LValue LenLVal = CGF.EmitLValueForField(
3896 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3897 CGF.EmitStoreOfScalar(Size, LenLVal);
3898 ++Pos;
3899 }
3900 }
3901 LValue PosLVal;
3902 if (HasIterator) {
3903 PosLVal = CGF.MakeAddrLValue(
3904 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3905 C.getSizeType());
3906 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3907 }
3908 // Process elements with iterators.
3909 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3910 const Expr *Modifier = C->getModifier();
3911 if (!Modifier)
3912 continue;
3913 OMPIteratorGeneratorScope IteratorScope(
3914 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3915 for (const Expr *E : C->varlist()) {
3916 llvm::Value *Addr;
3917 llvm::Value *Size;
3918 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3919 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3920 LValue Base =
3921 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3923 // affs[i].base_addr = &<Affinities[i].second>;
3924 LValue BaseAddrLVal = CGF.EmitLValueForField(
3925 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3926 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3927 BaseAddrLVal);
3928 // affs[i].len = sizeof(<Affinities[i].second>);
3929 LValue LenLVal = CGF.EmitLValueForField(
3930 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3931 CGF.EmitStoreOfScalar(Size, LenLVal);
3932 Idx = CGF.Builder.CreateNUWAdd(
3933 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3934 CGF.EmitStoreOfScalar(Idx, PosLVal);
3935 }
3936 }
3937 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3938 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3939 // naffins, kmp_task_affinity_info_t *affin_list);
3940 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3941 llvm::Value *GTid = getThreadID(CGF, Loc);
3942 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3943 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3944 // FIXME: Emit the function and ignore its result for now unless the
3945 // runtime function is properly implemented.
3946 (void)CGF.EmitRuntimeCall(
3947 OMPBuilder.getOrCreateRuntimeFunction(
3948 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3949 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3950 }
3951 llvm::Value *NewTaskNewTaskTTy =
3953 NewTask, KmpTaskTWithPrivatesPtrTy);
3954 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3955 KmpTaskTWithPrivatesQTy);
3956 LValue TDBase =
3957 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3958 // Fill the data in the resulting kmp_task_t record.
3959 // Copy shareds if there are any.
3960 Address KmpTaskSharedsPtr = Address::invalid();
3961 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
3962 KmpTaskSharedsPtr = Address(
3963 CGF.EmitLoadOfScalar(
3965 TDBase,
3966 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3967 Loc),
3968 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3969 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3970 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3971 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3972 }
3973 // Emit initial values for private copies (if any).
3975 if (!Privates.empty()) {
3976 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3977 SharedsTy, SharedsPtrTy, Data, Privates,
3978 /*ForDup=*/false);
3979 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3980 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3981 Result.TaskDupFn = emitTaskDupFunction(
3982 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3983 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3984 /*WithLastIter=*/!Data.LastprivateVars.empty());
3985 }
3986 }
3987 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3988 enum { Priority = 0, Destructors = 1 };
3989 // Provide pointer to function with destructors for privates.
3990 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3991 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
3992 assert(KmpCmplrdataUD->isUnion());
3993 if (NeedsCleanup) {
3994 llvm::Value *DestructorFn = emitDestructorsFunction(
3995 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3996 KmpTaskTWithPrivatesQTy);
3997 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3998 LValue DestructorsLV = CGF.EmitLValueForField(
3999 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4001 DestructorFn, KmpRoutineEntryPtrTy),
4002 DestructorsLV);
4003 }
4004 // Set priority.
4005 if (Data.Priority.getInt()) {
4006 LValue Data2LV = CGF.EmitLValueForField(
4007 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4008 LValue PriorityLV = CGF.EmitLValueForField(
4009 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4010 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4011 }
4012 Result.NewTask = NewTask;
4013 Result.TaskEntry = TaskEntry;
4014 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4015 Result.TDBase = TDBase;
4016 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4017 return Result;
4018}
4019
4020/// Translates internal dependency kind into the runtime kind.
4022 RTLDependenceKindTy DepKind;
4023 switch (K) {
4024 case OMPC_DEPEND_in:
4025 DepKind = RTLDependenceKindTy::DepIn;
4026 break;
4027 // Out and InOut dependencies must use the same code.
4028 case OMPC_DEPEND_out:
4029 case OMPC_DEPEND_inout:
4030 DepKind = RTLDependenceKindTy::DepInOut;
4031 break;
4032 case OMPC_DEPEND_mutexinoutset:
4033 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4034 break;
4035 case OMPC_DEPEND_inoutset:
4036 DepKind = RTLDependenceKindTy::DepInOutSet;
4037 break;
4038 case OMPC_DEPEND_outallmemory:
4039 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4040 break;
4041 case OMPC_DEPEND_source:
4042 case OMPC_DEPEND_sink:
4043 case OMPC_DEPEND_depobj:
4044 case OMPC_DEPEND_inoutallmemory:
4046 llvm_unreachable("Unknown task dependence type");
4047 }
4048 return DepKind;
4049}
4050
4051/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4052static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4053 QualType &FlagsTy) {
4054 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4055 if (KmpDependInfoTy.isNull()) {
4056 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4057 KmpDependInfoRD->startDefinition();
4058 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4059 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4060 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4061 KmpDependInfoRD->completeDefinition();
4062 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4063 }
4064}
4065
4066std::pair<llvm::Value *, LValue>
4068 SourceLocation Loc) {
4069 ASTContext &C = CGM.getContext();
4070 QualType FlagsTy;
4071 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4072 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4073 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4075 DepobjLVal.getAddress().withElementType(
4076 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4077 KmpDependInfoPtrTy->castAs<PointerType>());
4078 Address DepObjAddr = CGF.Builder.CreateGEP(
4079 CGF, Base.getAddress(),
4080 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4081 LValue NumDepsBase = CGF.MakeAddrLValue(
4082 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4083 // NumDeps = deps[i].base_addr;
4084 LValue BaseAddrLVal = CGF.EmitLValueForField(
4085 NumDepsBase,
4086 *std::next(KmpDependInfoRD->field_begin(),
4087 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4088 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4089 return std::make_pair(NumDeps, Base);
4090}
4091
4092static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4093 llvm::PointerUnion<unsigned *, LValue *> Pos,
4095 Address DependenciesArray) {
4096 CodeGenModule &CGM = CGF.CGM;
4097 ASTContext &C = CGM.getContext();
4098 QualType FlagsTy;
4099 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4100 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4101 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4102
4103 OMPIteratorGeneratorScope IteratorScope(
4104 CGF, cast_or_null<OMPIteratorExpr>(
4105 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4106 : nullptr));
4107 for (const Expr *E : Data.DepExprs) {
4108 llvm::Value *Addr;
4109 llvm::Value *Size;
4110
4111 // The expression will be a nullptr in the 'omp_all_memory' case.
4112 if (E) {
4113 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4114 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4115 } else {
4116 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4117 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4118 }
4119 LValue Base;
4120 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4121 Base = CGF.MakeAddrLValue(
4122 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4123 } else {
4124 assert(E && "Expected a non-null expression");
4125 LValue &PosLVal = *cast<LValue *>(Pos);
4126 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4127 Base = CGF.MakeAddrLValue(
4128 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4129 }
4130 // deps[i].base_addr = &<Dependencies[i].second>;
4131 LValue BaseAddrLVal = CGF.EmitLValueForField(
4132 Base,
4133 *std::next(KmpDependInfoRD->field_begin(),
4134 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4135 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4136 // deps[i].len = sizeof(<Dependencies[i].second>);
4137 LValue LenLVal = CGF.EmitLValueForField(
4138 Base, *std::next(KmpDependInfoRD->field_begin(),
4139 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4140 CGF.EmitStoreOfScalar(Size, LenLVal);
4141 // deps[i].flags = <Dependencies[i].first>;
4142 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4143 LValue FlagsLVal = CGF.EmitLValueForField(
4144 Base,
4145 *std::next(KmpDependInfoRD->field_begin(),
4146 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4148 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4149 FlagsLVal);
4150 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4151 ++(*P);
4152 } else {
4153 LValue &PosLVal = *cast<LValue *>(Pos);
4154 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4155 Idx = CGF.Builder.CreateNUWAdd(Idx,
4156 llvm::ConstantInt::get(Idx->getType(), 1));
4157 CGF.EmitStoreOfScalar(Idx, PosLVal);
4158 }
4159 }
4160}
4161
4165 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4166 "Expected depobj dependency kind.");
4168 SmallVector<LValue, 4> SizeLVals;
4169 ASTContext &C = CGF.getContext();
4170 {
4171 OMPIteratorGeneratorScope IteratorScope(
4172 CGF, cast_or_null<OMPIteratorExpr>(
4173 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4174 : nullptr));
4175 for (const Expr *E : Data.DepExprs) {
4176 llvm::Value *NumDeps;
4177 LValue Base;
4178 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4179 std::tie(NumDeps, Base) =
4180 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4181 LValue NumLVal = CGF.MakeAddrLValue(
4182 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4183 C.getUIntPtrType());
4184 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4185 NumLVal.getAddress());
4186 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4187 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4188 CGF.EmitStoreOfScalar(Add, NumLVal);
4189 SizeLVals.push_back(NumLVal);
4190 }
4191 }
4192 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4193 llvm::Value *Size =
4194 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4195 Sizes.push_back(Size);
4196 }
4197 return Sizes;
4198}
4199
4202 LValue PosLVal,
4204 Address DependenciesArray) {
4205 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4206 "Expected depobj dependency kind.");
4207 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4208 {
4209 OMPIteratorGeneratorScope IteratorScope(
4210 CGF, cast_or_null<OMPIteratorExpr>(
4211 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4212 : nullptr));
4213 for (const Expr *E : Data.DepExprs) {
4214 llvm::Value *NumDeps;
4215 LValue Base;
4216 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4217 std::tie(NumDeps, Base) =
4218 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4219
4220 // memcopy dependency data.
4221 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4222 ElSize,
4223 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4224 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4225 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4226 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4227
4228 // Increase pos.
4229 // pos += size;
4230 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4231 CGF.EmitStoreOfScalar(Add, PosLVal);
4232 }
4233 }
4234}
4235
4236std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4238 SourceLocation Loc) {
4239 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4240 return D.DepExprs.empty();
4241 }))
4242 return std::make_pair(nullptr, Address::invalid());
4243 // Process list of dependencies.
4244 ASTContext &C = CGM.getContext();
4245 Address DependenciesArray = Address::invalid();
4246 llvm::Value *NumOfElements = nullptr;
4247 unsigned NumDependencies = std::accumulate(
4248 Dependencies.begin(), Dependencies.end(), 0,
4249 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4250 return D.DepKind == OMPC_DEPEND_depobj
4251 ? V
4252 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4253 });
4254 QualType FlagsTy;
4255 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4256 bool HasDepobjDeps = false;
4257 bool HasRegularWithIterators = false;
4258 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4259 llvm::Value *NumOfRegularWithIterators =
4260 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4261 // Calculate number of depobj dependencies and regular deps with the
4262 // iterators.
4263 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4264 if (D.DepKind == OMPC_DEPEND_depobj) {
4267 for (llvm::Value *Size : Sizes) {
4268 NumOfDepobjElements =
4269 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4270 }
4271 HasDepobjDeps = true;
4272 continue;
4273 }
4274 // Include number of iterations, if any.
4275
4276 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4277 llvm::Value *ClauseIteratorSpace =
4278 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4279 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4280 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4281 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4282 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4283 }
4284 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4285 ClauseIteratorSpace,
4286 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4287 NumOfRegularWithIterators =
4288 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4289 HasRegularWithIterators = true;
4290 continue;
4291 }
4292 }
4293
4294 QualType KmpDependInfoArrayTy;
4295 if (HasDepobjDeps || HasRegularWithIterators) {
4296 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4297 /*isSigned=*/false);
4298 if (HasDepobjDeps) {
4299 NumOfElements =
4300 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4301 }
4302 if (HasRegularWithIterators) {
4303 NumOfElements =
4304 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4305 }
4306 auto *OVE = new (C) OpaqueValueExpr(
4307 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4308 VK_PRValue);
4309 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4310 RValue::get(NumOfElements));
4311 KmpDependInfoArrayTy =
4312 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4313 /*IndexTypeQuals=*/0);
4314 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4315 // Properly emit variable-sized array.
4316 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4318 CGF.EmitVarDecl(*PD);
4319 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4320 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4321 /*isSigned=*/false);
4322 } else {
4323 KmpDependInfoArrayTy = C.getConstantArrayType(
4324 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4325 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4326 DependenciesArray =
4327 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4328 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4329 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4330 /*isSigned=*/false);
4331 }
4332 unsigned Pos = 0;
4333 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4334 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4335 continue;
4336 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4337 }
4338 // Copy regular dependencies with iterators.
4339 LValue PosLVal = CGF.MakeAddrLValue(
4340 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4341 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4342 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4343 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4344 continue;
4345 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4346 }
4347 // Copy final depobj arrays without iterators.
4348 if (HasDepobjDeps) {
4349 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4350 if (Dep.DepKind != OMPC_DEPEND_depobj)
4351 continue;
4352 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4353 }
4354 }
4355 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4356 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4357 return std::make_pair(NumOfElements, DependenciesArray);
4358}
4359
4361 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4362 SourceLocation Loc) {
4363 if (Dependencies.DepExprs.empty())
4364 return Address::invalid();
4365 // Process list of dependencies.
4366 ASTContext &C = CGM.getContext();
4367 Address DependenciesArray = Address::invalid();
4368 unsigned NumDependencies = Dependencies.DepExprs.size();
4369 QualType FlagsTy;
4370 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4371 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4372
4373 llvm::Value *Size;
4374 // Define type kmp_depend_info[<Dependencies.size()>];
4375 // For depobj reserve one extra element to store the number of elements.
4376 // It is required to handle depobj(x) update(in) construct.
4377 // kmp_depend_info[<Dependencies.size()>] deps;
4378 llvm::Value *NumDepsVal;
4379 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4380 if (const auto *IE =
4381 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4382 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4383 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4384 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4385 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4386 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4387 }
4388 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4389 NumDepsVal);
4390 CharUnits SizeInBytes =
4391 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4392 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4393 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4394 NumDepsVal =
4395 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4396 } else {
4397 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4398 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4399 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4400 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4401 Size = CGM.getSize(Sz.alignTo(Align));
4402 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4403 }
4404 // Need to allocate on the dynamic memory.
4405 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4406 // Use default allocator.
4407 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4408 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4409
4410 llvm::Value *Addr =
4411 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4412 CGM.getModule(), OMPRTL___kmpc_alloc),
4413 Args, ".dep.arr.addr");
4414 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4416 Addr, CGF.Builder.getPtrTy(0));
4417 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4418 // Write number of elements in the first element of array for depobj.
4419 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4420 // deps[i].base_addr = NumDependencies;
4421 LValue BaseAddrLVal = CGF.EmitLValueForField(
4422 Base,
4423 *std::next(KmpDependInfoRD->field_begin(),
4424 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4425 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4426 llvm::PointerUnion<unsigned *, LValue *> Pos;
4427 unsigned Idx = 1;
4428 LValue PosLVal;
4429 if (Dependencies.IteratorExpr) {
4430 PosLVal = CGF.MakeAddrLValue(
4431 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4432 C.getSizeType());
4433 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4434 /*IsInit=*/true);
4435 Pos = &PosLVal;
4436 } else {
4437 Pos = &Idx;
4438 }
4439 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4440 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4441 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4442 CGF.Int8Ty);
4443 return DependenciesArray;
4444}
4445
4447 SourceLocation Loc) {
4448 ASTContext &C = CGM.getContext();
4449 QualType FlagsTy;
4450 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4451 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4452 C.VoidPtrTy.castAs<PointerType>());
4453 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4455 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4457 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4458 Addr.getElementType(), Addr.emitRawPointer(CGF),
4459 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4460 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4461 CGF.VoidPtrTy);
4462 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4463 // Use default allocator.
4464 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4465 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4466
4467 // _kmpc_free(gtid, addr, nullptr);
4468 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4469 CGM.getModule(), OMPRTL___kmpc_free),
4470 Args);
4471}
4472
4474 OpenMPDependClauseKind NewDepKind,
4475 SourceLocation Loc) {
4476 ASTContext &C = CGM.getContext();
4477 QualType FlagsTy;
4478 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4479 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4480 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4481 llvm::Value *NumDeps;
4482 LValue Base;
4483 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4484
4485 Address Begin = Base.getAddress();
4486 // Cast from pointer to array type to pointer to single element.
4487 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4488 Begin.emitRawPointer(CGF), NumDeps);
4489 // The basic structure here is a while-do loop.
4490 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4491 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4492 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4493 CGF.EmitBlock(BodyBB);
4494 llvm::PHINode *ElementPHI =
4495 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4496 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4497 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4498 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4499 Base.getTBAAInfo());
4500 // deps[i].flags = NewDepKind;
4501 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4502 LValue FlagsLVal = CGF.EmitLValueForField(
4503 Base, *std::next(KmpDependInfoRD->field_begin(),
4504 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4506 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4507 FlagsLVal);
4508
4509 // Shift the address forward by one element.
4510 llvm::Value *ElementNext =
4511 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4512 .emitRawPointer(CGF);
4513 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4514 llvm::Value *IsEmpty =
4515 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4516 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4517 // Done.
4518 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4519}
4520
4522 const OMPExecutableDirective &D,
4523 llvm::Function *TaskFunction,
4524 QualType SharedsTy, Address Shareds,
4525 const Expr *IfCond,
4526 const OMPTaskDataTy &Data) {
4527 if (!CGF.HaveInsertPoint())
4528 return;
4529
4531 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4532 llvm::Value *NewTask = Result.NewTask;
4533 llvm::Function *TaskEntry = Result.TaskEntry;
4534 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4535 LValue TDBase = Result.TDBase;
4536 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4537 // Process list of dependences.
4538 Address DependenciesArray = Address::invalid();
4539 llvm::Value *NumOfElements;
4540 std::tie(NumOfElements, DependenciesArray) =
4541 emitDependClause(CGF, Data.Dependences, Loc);
4542
4543 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4544 // libcall.
4545 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4546 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4547 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4548 // list is not empty
4549 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4550 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4551 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4552 llvm::Value *DepTaskArgs[7];
4553 if (!Data.Dependences.empty()) {
4554 DepTaskArgs[0] = UpLoc;
4555 DepTaskArgs[1] = ThreadID;
4556 DepTaskArgs[2] = NewTask;
4557 DepTaskArgs[3] = NumOfElements;
4558 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4559 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4560 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4561 }
4562 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4563 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4564 if (!Data.Tied) {
4565 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4566 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4567 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4568 }
4569 if (!Data.Dependences.empty()) {
4570 CGF.EmitRuntimeCall(
4571 OMPBuilder.getOrCreateRuntimeFunction(
4572 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4573 DepTaskArgs);
4574 } else {
4575 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4576 CGM.getModule(), OMPRTL___kmpc_omp_task),
4577 TaskArgs);
4578 }
4579 // Check if parent region is untied and build return for untied task;
4580 if (auto *Region =
4581 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4582 Region->emitUntiedSwitch(CGF);
4583 };
4584
4585 llvm::Value *DepWaitTaskArgs[7];
4586 if (!Data.Dependences.empty()) {
4587 DepWaitTaskArgs[0] = UpLoc;
4588 DepWaitTaskArgs[1] = ThreadID;
4589 DepWaitTaskArgs[2] = NumOfElements;
4590 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4591 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4592 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4593 DepWaitTaskArgs[6] =
4594 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4595 }
4596 auto &M = CGM.getModule();
4597 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4598 TaskEntry, &Data, &DepWaitTaskArgs,
4599 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4600 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4601 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4602 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4603 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4604 // is specified.
4605 if (!Data.Dependences.empty())
4606 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4607 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4608 DepWaitTaskArgs);
4609 // Call proxy_task_entry(gtid, new_task);
4610 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4611 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4612 Action.Enter(CGF);
4613 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4614 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4615 OutlinedFnArgs);
4616 };
4617
4618 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4619 // kmp_task_t *new_task);
4620 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4621 // kmp_task_t *new_task);
4623 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4624 M, OMPRTL___kmpc_omp_task_begin_if0),
4625 TaskArgs,
4626 OMPBuilder.getOrCreateRuntimeFunction(
4627 M, OMPRTL___kmpc_omp_task_complete_if0),
4628 TaskArgs);
4629 RCG.setAction(Action);
4630 RCG(CGF);
4631 };
4632
4633 if (IfCond) {
4634 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4635 } else {
4636 RegionCodeGenTy ThenRCG(ThenCodeGen);
4637 ThenRCG(CGF);
4638 }
4639}
4640
4642 const OMPLoopDirective &D,
4643 llvm::Function *TaskFunction,
4644 QualType SharedsTy, Address Shareds,
4645 const Expr *IfCond,
4646 const OMPTaskDataTy &Data) {
4647 if (!CGF.HaveInsertPoint())
4648 return;
4650 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4651 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4652 // libcall.
4653 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4654 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4655 // sched, kmp_uint64 grainsize, void *task_dup);
4656 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4657 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4658 llvm::Value *IfVal;
4659 if (IfCond) {
4660 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4661 /*isSigned=*/true);
4662 } else {
4663 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4664 }
4665
4666 LValue LBLVal = CGF.EmitLValueForField(
4667 Result.TDBase,
4668 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4669 const auto *LBVar =
4670 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4671 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4672 /*IsInitializer=*/true);
4673 LValue UBLVal = CGF.EmitLValueForField(
4674 Result.TDBase,
4675 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4676 const auto *UBVar =
4677 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4678 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4679 /*IsInitializer=*/true);
4680 LValue StLVal = CGF.EmitLValueForField(
4681 Result.TDBase,
4682 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4683 const auto *StVar =
4684 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4685 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4686 /*IsInitializer=*/true);
4687 // Store reductions address.
4688 LValue RedLVal = CGF.EmitLValueForField(
4689 Result.TDBase,
4690 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4691 if (Data.Reductions) {
4692 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4693 } else {
4694 CGF.EmitNullInitialization(RedLVal.getAddress(),
4695 CGF.getContext().VoidPtrTy);
4696 }
4697 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4699 UpLoc,
4700 ThreadID,
4701 Result.NewTask,
4702 IfVal,
4703 LBLVal.getPointer(CGF),
4704 UBLVal.getPointer(CGF),
4705 CGF.EmitLoadOfScalar(StLVal, Loc),
4706 llvm::ConstantInt::getSigned(
4707 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4708 llvm::ConstantInt::getSigned(
4709 CGF.IntTy, Data.Schedule.getPointer()
4710 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4711 : NoSchedule),
4712 Data.Schedule.getPointer()
4713 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4714 /*isSigned=*/false)
4715 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4716 if (Data.HasModifier)
4717 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4718
4719 TaskArgs.push_back(Result.TaskDupFn
4721 Result.TaskDupFn, CGF.VoidPtrTy)
4722 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4723 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4724 CGM.getModule(), Data.HasModifier
4725 ? OMPRTL___kmpc_taskloop_5
4726 : OMPRTL___kmpc_taskloop),
4727 TaskArgs);
4728}
4729
4730/// Emit reduction operation for each element of array (required for
4731/// array sections) LHS op = RHS.
4732/// \param Type Type of array.
4733/// \param LHSVar Variable on the left side of the reduction operation
4734/// (references element of array in original variable).
4735/// \param RHSVar Variable on the right side of the reduction operation
4736/// (references element of array in original variable).
4737/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4738/// RHSVar.
4740 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4741 const VarDecl *RHSVar,
4742 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4743 const Expr *, const Expr *)> &RedOpGen,
4744 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4745 const Expr *UpExpr = nullptr) {
4746 // Perform element-by-element initialization.
4747 QualType ElementTy;
4748 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4749 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4750
4751 // Drill down to the base element type on both arrays.
4752 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4753 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4754
4755 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4756 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4757 // Cast from pointer to array type to pointer to single element.
4758 llvm::Value *LHSEnd =
4759 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4760 // The basic structure here is a while-do loop.
4761 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4762 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4763 llvm::Value *IsEmpty =
4764 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4765 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4766
4767 // Enter the loop body, making that address the current address.
4768 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4769 CGF.EmitBlock(BodyBB);
4770
4771 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4772
4773 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4774 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4775 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4776 Address RHSElementCurrent(
4777 RHSElementPHI, RHSAddr.getElementType(),
4778 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4779
4780 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4781 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4782 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4783 Address LHSElementCurrent(
4784 LHSElementPHI, LHSAddr.getElementType(),
4785 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4786
4787 // Emit copy.
4789 Scope.addPrivate(LHSVar, LHSElementCurrent);
4790 Scope.addPrivate(RHSVar, RHSElementCurrent);
4791 Scope.Privatize();
4792 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4793 Scope.ForceCleanup();
4794
4795 // Shift the address forward by one element.
4796 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4797 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4798 "omp.arraycpy.dest.element");
4799 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4800 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4801 "omp.arraycpy.src.element");
4802 // Check whether we've reached the end.
4803 llvm::Value *Done =
4804 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4805 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4806 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4807 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4808
4809 // Done.
4810 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4811}
4812
4813/// Emit reduction combiner. If the combiner is a simple expression emit it as
4814/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4815/// UDR combiner function.
4817 const Expr *ReductionOp) {
4818 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4819 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4820 if (const auto *DRE =
4821 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4822 if (const auto *DRD =
4823 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4824 std::pair<llvm::Function *, llvm::Function *> Reduction =
4828 CGF.EmitIgnoredExpr(ReductionOp);
4829 return;
4830 }
4831 CGF.EmitIgnoredExpr(ReductionOp);
4832}
4833
4835 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4837 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4838 ASTContext &C = CGM.getContext();
4839
4840 // void reduction_func(void *LHSArg, void *RHSArg);
4841 FunctionArgList Args;
4842 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4844 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4846 Args.push_back(&LHSArg);
4847 Args.push_back(&RHSArg);
4848 const auto &CGFI =
4849 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4850 std::string Name = getReductionFuncName(ReducerName);
4851 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4852 llvm::GlobalValue::InternalLinkage, Name,
4853 &CGM.getModule());
4854 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4855 Fn->setDoesNotRecurse();
4856 CodeGenFunction CGF(CGM);
4857 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4858
4859 // Dst = (void*[n])(LHSArg);
4860 // Src = (void*[n])(RHSArg);
4862 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4863 CGF.Builder.getPtrTy(0)),
4864 ArgsElemType, CGF.getPointerAlign());
4866 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4867 CGF.Builder.getPtrTy(0)),
4868 ArgsElemType, CGF.getPointerAlign());
4869
4870 // ...
4871 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4872 // ...
4874 const auto *IPriv = Privates.begin();
4875 unsigned Idx = 0;
4876 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4877 const auto *RHSVar =
4878 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4879 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4880 const auto *LHSVar =
4881 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4882 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4883 QualType PrivTy = (*IPriv)->getType();
4884 if (PrivTy->isVariablyModifiedType()) {
4885 // Get array size and emit VLA type.
4886 ++Idx;
4887 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4888 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4889 const VariableArrayType *VLA =
4890 CGF.getContext().getAsVariableArrayType(PrivTy);
4891 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4893 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4894 CGF.EmitVariablyModifiedType(PrivTy);
4895 }
4896 }
4897 Scope.Privatize();
4898 IPriv = Privates.begin();
4899 const auto *ILHS = LHSExprs.begin();
4900 const auto *IRHS = RHSExprs.begin();
4901 for (const Expr *E : ReductionOps) {
4902 if ((*IPriv)->getType()->isArrayType()) {
4903 // Emit reduction for array section.
4904 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4905 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4907 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4908 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4909 emitReductionCombiner(CGF, E);
4910 });
4911 } else {
4912 // Emit reduction for array subscript or single variable.
4913 emitReductionCombiner(CGF, E);
4914 }
4915 ++IPriv;
4916 ++ILHS;
4917 ++IRHS;
4918 }
4919 Scope.ForceCleanup();
4920 CGF.FinishFunction();
4921 return Fn;
4922}
4923
4925 const Expr *ReductionOp,
4926 const Expr *PrivateRef,
4927 const DeclRefExpr *LHS,
4928 const DeclRefExpr *RHS) {
4929 if (PrivateRef->getType()->isArrayType()) {
4930 // Emit reduction for array section.
4931 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4932 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4934 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4935 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4936 emitReductionCombiner(CGF, ReductionOp);
4937 });
4938 } else {
4939 // Emit reduction for array subscript or single variable.
4940 emitReductionCombiner(CGF, ReductionOp);
4941 }
4942}
4943
4944static std::string generateUniqueName(CodeGenModule &CGM,
4945 llvm::StringRef Prefix, const Expr *Ref);
4946
4948 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4949 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4950
4951 // Create a shared global variable (__shared_reduction_var) to accumulate the
4952 // final result.
4953 //
4954 // Call __kmpc_barrier to synchronize threads before initialization.
4955 //
4956 // The master thread (thread_id == 0) initializes __shared_reduction_var
4957 // with the identity value or initializer.
4958 //
4959 // Call __kmpc_barrier to synchronize before combining.
4960 // For each i:
4961 // - Thread enters critical section.
4962 // - Reads its private value from LHSExprs[i].
4963 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4964 // Privates[i]).
4965 // - Exits critical section.
4966 //
4967 // Call __kmpc_barrier after combining.
4968 //
4969 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4970 //
4971 // Final __kmpc_barrier to synchronize after broadcasting
4972 QualType PrivateType = Privates->getType();
4973 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4974
4975 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4976 std::string ReductionVarNameStr;
4977 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4978 ReductionVarNameStr =
4979 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4980 else
4981 ReductionVarNameStr = "unnamed_priv_var";
4982
4983 // Create an internal shared variable
4984 std::string SharedName =
4985 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4986 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4987 LLVMType, ".omp.reduction." + SharedName);
4988
4989 SharedVar->setAlignment(
4990 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4991
4992 Address SharedResult =
4993 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4994
4995 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4996 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4997 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4998
4999 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
5000 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
5001
5002 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
5003 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
5004 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5005
5006 CGF.EmitBlock(InitBB);
5007
5008 auto EmitSharedInit = [&]() {
5009 if (UDR) { // Check if it's a User-Defined Reduction
5010 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5011 std::pair<llvm::Function *, llvm::Function *> FnPair =
5013 llvm::Function *InitializerFn = FnPair.second;
5014 if (InitializerFn) {
5015 if (const auto *CE =
5016 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5017 const auto *OutDRE = cast<DeclRefExpr>(
5018 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5019 ->getSubExpr());
5020 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5021
5022 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5023 LocalScope.addPrivate(OutVD, SharedResult);
5024
5025 (void)LocalScope.Privatize();
5026 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5027 CE->getCallee()->IgnoreParenImpCasts())) {
5029 CGF, OVE, RValue::get(InitializerFn));
5030 CGF.EmitIgnoredExpr(CE);
5031 } else {
5032 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5033 PrivateType.getQualifiers(),
5034 /*IsInitializer=*/true);
5035 }
5036 } else {
5037 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5038 PrivateType.getQualifiers(),
5039 /*IsInitializer=*/true);
5040 }
5041 } else {
5042 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5043 PrivateType.getQualifiers(),
5044 /*IsInitializer=*/true);
5045 }
5046 } else {
5047 // EmitNullInitialization handles default construction for C++ classes
5048 // and zeroing for scalars, which is a reasonable default.
5049 CGF.EmitNullInitialization(SharedResult, PrivateType);
5050 }
5051 return; // UDR initialization handled
5052 }
5053 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5054 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5055 if (const Expr *InitExpr = VD->getInit()) {
5056 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5057 PrivateType.getQualifiers(), true);
5058 return;
5059 }
5060 }
5061 }
5062 CGF.EmitNullInitialization(SharedResult, PrivateType);
5063 };
5064 EmitSharedInit();
5065 CGF.Builder.CreateBr(InitEndBB);
5066 CGF.EmitBlock(InitEndBB);
5067
5068 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5069 CGM.getModule(), OMPRTL___kmpc_barrier),
5070 BarrierArgs);
5071
5072 const Expr *ReductionOp = ReductionOps;
5073 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5074 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5075 LValue LHSLV = CGF.EmitLValue(Privates);
5076
5077 auto EmitCriticalReduction = [&](auto ReductionGen) {
5078 std::string CriticalName = getName({"reduction_critical"});
5079 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5080 };
5081
5082 if (CurrentUDR) {
5083 // Handle user-defined reduction.
5084 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5085 Action.Enter(CGF);
5086 std::pair<llvm::Function *, llvm::Function *> FnPair =
5087 getUserDefinedReduction(CurrentUDR);
5088 if (FnPair.first) {
5089 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5090 const auto *OutDRE = cast<DeclRefExpr>(
5091 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5092 ->getSubExpr());
5093 const auto *InDRE = cast<DeclRefExpr>(
5094 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5095 ->getSubExpr());
5096 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5097 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5098 SharedLV.getAddress());
5099 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5100 LHSLV.getAddress());
5101 (void)LocalScope.Privatize();
5102 emitReductionCombiner(CGF, ReductionOp);
5103 }
5104 }
5105 };
5106 EmitCriticalReduction(ReductionGen);
5107 } else {
5108 // Handle built-in reduction operations.
5109#ifndef NDEBUG
5110 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5111 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5112 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5113
5114 const Expr *AssignRHS = nullptr;
5115 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5116 if (BinOp->getOpcode() == BO_Assign)
5117 AssignRHS = BinOp->getRHS();
5118 } else if (const auto *OpCall =
5119 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5120 if (OpCall->getOperator() == OO_Equal)
5121 AssignRHS = OpCall->getArg(1);
5122 }
5123
5124 assert(AssignRHS &&
5125 "Private Variable Reduction : Invalid ReductionOp expression");
5126#endif
5127
5128 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5129 Action.Enter(CGF);
5130 const auto *OmpOutDRE =
5131 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5132 const auto *OmpInDRE =
5133 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5134 assert(
5135 OmpOutDRE && OmpInDRE &&
5136 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5137 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5138 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5139 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5140 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5141 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5142 (void)LocalScope.Privatize();
5143 // Emit the actual reduction operation
5144 CGF.EmitIgnoredExpr(ReductionOp);
5145 };
5146 EmitCriticalReduction(ReductionGen);
5147 }
5148
5149 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5150 CGM.getModule(), OMPRTL___kmpc_barrier),
5151 BarrierArgs);
5152
5153 // Broadcast final result
5154 bool IsAggregate = PrivateType->isAggregateType();
5155 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5156 llvm::Value *FinalResultVal = nullptr;
5157 Address FinalResultAddr = Address::invalid();
5158
5159 if (IsAggregate)
5160 FinalResultAddr = SharedResult;
5161 else
5162 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5163
5164 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5165 if (IsAggregate) {
5166 CGF.EmitAggregateCopy(TargetLHSLV,
5167 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5168 PrivateType, AggValueSlot::DoesNotOverlap, false);
5169 } else {
5170 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5171 }
5172 // Final synchronization barrier
5173 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5174 CGM.getModule(), OMPRTL___kmpc_barrier),
5175 BarrierArgs);
5176
5177 // Combiner with original list item
5178 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5179 PrePostActionTy &Action) {
5180 Action.Enter(CGF);
5181 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5182 cast<DeclRefExpr>(LHSExprs),
5183 cast<DeclRefExpr>(RHSExprs));
5184 };
5185 EmitCriticalReduction(OriginalListCombiner);
5186}
5187
5189 ArrayRef<const Expr *> OrgPrivates,
5190 ArrayRef<const Expr *> OrgLHSExprs,
5191 ArrayRef<const Expr *> OrgRHSExprs,
5192 ArrayRef<const Expr *> OrgReductionOps,
5193 ReductionOptionsTy Options) {
5194 if (!CGF.HaveInsertPoint())
5195 return;
5196
5197 bool WithNowait = Options.WithNowait;
5198 bool SimpleReduction = Options.SimpleReduction;
5199
5200 // Next code should be emitted for reduction:
5201 //
5202 // static kmp_critical_name lock = { 0 };
5203 //
5204 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5205 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5206 // ...
5207 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5208 // *(Type<n>-1*)rhs[<n>-1]);
5209 // }
5210 //
5211 // ...
5212 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5213 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5214 // RedList, reduce_func, &<lock>)) {
5215 // case 1:
5216 // ...
5217 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5218 // ...
5219 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5220 // break;
5221 // case 2:
5222 // ...
5223 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5224 // ...
5225 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5226 // break;
5227 // default:;
5228 // }
5229 //
5230 // if SimpleReduction is true, only the next code is generated:
5231 // ...
5232 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5233 // ...
5234
5235 ASTContext &C = CGM.getContext();
5236
5237 if (SimpleReduction) {
5239 const auto *IPriv = OrgPrivates.begin();
5240 const auto *ILHS = OrgLHSExprs.begin();
5241 const auto *IRHS = OrgRHSExprs.begin();
5242 for (const Expr *E : OrgReductionOps) {
5243 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5244 cast<DeclRefExpr>(*IRHS));
5245 ++IPriv;
5246 ++ILHS;
5247 ++IRHS;
5248 }
5249 return;
5250 }
5251
5252 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5253 // Only keep entries where the corresponding variable is not private.
5254 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5255 FilteredRHSExprs, FilteredReductionOps;
5256 for (unsigned I : llvm::seq<unsigned>(
5257 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5258 if (!Options.IsPrivateVarReduction[I]) {
5259 FilteredPrivates.emplace_back(OrgPrivates[I]);
5260 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5261 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5262 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5263 }
5264 }
5265 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5266 // processing.
5267 ArrayRef<const Expr *> Privates = FilteredPrivates;
5268 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5269 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5270 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5271
5272 // 1. Build a list of reduction variables.
5273 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5274 auto Size = RHSExprs.size();
5275 for (const Expr *E : Privates) {
5276 if (E->getType()->isVariablyModifiedType())
5277 // Reserve place for array size.
5278 ++Size;
5279 }
5280 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5281 QualType ReductionArrayTy = C.getConstantArrayType(
5282 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5283 /*IndexTypeQuals=*/0);
5284 RawAddress ReductionList =
5285 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5286 const auto *IPriv = Privates.begin();
5287 unsigned Idx = 0;
5288 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5289 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5290 CGF.Builder.CreateStore(
5292 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5293 Elem);
5294 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5295 // Store array size.
5296 ++Idx;
5297 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5298 llvm::Value *Size = CGF.Builder.CreateIntCast(
5299 CGF.getVLASize(
5300 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5301 .NumElts,
5302 CGF.SizeTy, /*isSigned=*/false);
5303 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5304 Elem);
5305 }
5306 }
5307
5308 // 2. Emit reduce_func().
5309 llvm::Function *ReductionFn = emitReductionFunction(
5310 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5311 Privates, LHSExprs, RHSExprs, ReductionOps);
5312
5313 // 3. Create static kmp_critical_name lock = { 0 };
5314 std::string Name = getName({"reduction"});
5315 llvm::Value *Lock = getCriticalRegionLock(Name);
5316
5317 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5318 // RedList, reduce_func, &<lock>);
5319 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5320 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5321 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5322 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5323 ReductionList.getPointer(), CGF.VoidPtrTy);
5324 llvm::Value *Args[] = {
5325 IdentTLoc, // ident_t *<loc>
5326 ThreadId, // i32 <gtid>
5327 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5328 ReductionArrayTySize, // size_type sizeof(RedList)
5329 RL, // void *RedList
5330 ReductionFn, // void (*) (void *, void *) <reduce_func>
5331 Lock // kmp_critical_name *&<lock>
5332 };
5333 llvm::Value *Res = CGF.EmitRuntimeCall(
5334 OMPBuilder.getOrCreateRuntimeFunction(
5335 CGM.getModule(),
5336 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5337 Args);
5338
5339 // 5. Build switch(res)
5340 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5341 llvm::SwitchInst *SwInst =
5342 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5343
5344 // 6. Build case 1:
5345 // ...
5346 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5347 // ...
5348 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5349 // break;
5350 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5351 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5352 CGF.EmitBlock(Case1BB);
5353
5354 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5355 llvm::Value *EndArgs[] = {
5356 IdentTLoc, // ident_t *<loc>
5357 ThreadId, // i32 <gtid>
5358 Lock // kmp_critical_name *&<lock>
5359 };
5360 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5361 CodeGenFunction &CGF, PrePostActionTy &Action) {
5363 const auto *IPriv = Privates.begin();
5364 const auto *ILHS = LHSExprs.begin();
5365 const auto *IRHS = RHSExprs.begin();
5366 for (const Expr *E : ReductionOps) {
5367 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5368 cast<DeclRefExpr>(*IRHS));
5369 ++IPriv;
5370 ++ILHS;
5371 ++IRHS;
5372 }
5373 };
5375 CommonActionTy Action(
5376 nullptr, {},
5377 OMPBuilder.getOrCreateRuntimeFunction(
5378 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5379 : OMPRTL___kmpc_end_reduce),
5380 EndArgs);
5381 RCG.setAction(Action);
5382 RCG(CGF);
5383
5384 CGF.EmitBranch(DefaultBB);
5385
5386 // 7. Build case 2:
5387 // ...
5388 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5389 // ...
5390 // break;
5391 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5392 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5393 CGF.EmitBlock(Case2BB);
5394
5395 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5396 CodeGenFunction &CGF, PrePostActionTy &Action) {
5397 const auto *ILHS = LHSExprs.begin();
5398 const auto *IRHS = RHSExprs.begin();
5399 const auto *IPriv = Privates.begin();
5400 for (const Expr *E : ReductionOps) {
5401 const Expr *XExpr = nullptr;
5402 const Expr *EExpr = nullptr;
5403 const Expr *UpExpr = nullptr;
5404 BinaryOperatorKind BO = BO_Comma;
5405 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5406 if (BO->getOpcode() == BO_Assign) {
5407 XExpr = BO->getLHS();
5408 UpExpr = BO->getRHS();
5409 }
5410 }
5411 // Try to emit update expression as a simple atomic.
5412 const Expr *RHSExpr = UpExpr;
5413 if (RHSExpr) {
5414 // Analyze RHS part of the whole expression.
5415 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5416 RHSExpr->IgnoreParenImpCasts())) {
5417 // If this is a conditional operator, analyze its condition for
5418 // min/max reduction operator.
5419 RHSExpr = ACO->getCond();
5420 }
5421 if (const auto *BORHS =
5422 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5423 EExpr = BORHS->getRHS();
5424 BO = BORHS->getOpcode();
5425 }
5426 }
5427 if (XExpr) {
5428 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5429 auto &&AtomicRedGen = [BO, VD,
5430 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5431 const Expr *EExpr, const Expr *UpExpr) {
5432 LValue X = CGF.EmitLValue(XExpr);
5433 RValue E;
5434 if (EExpr)
5435 E = CGF.EmitAnyExpr(EExpr);
5436 CGF.EmitOMPAtomicSimpleUpdateExpr(
5437 X, E, BO, /*IsXLHSInRHSPart=*/true,
5438 llvm::AtomicOrdering::Monotonic, Loc,
5439 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5440 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5441 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5442 CGF.emitOMPSimpleStore(
5443 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5444 VD->getType().getNonReferenceType(), Loc);
5445 PrivateScope.addPrivate(VD, LHSTemp);
5446 (void)PrivateScope.Privatize();
5447 return CGF.EmitAnyExpr(UpExpr);
5448 });
5449 };
5450 if ((*IPriv)->getType()->isArrayType()) {
5451 // Emit atomic reduction for array section.
5452 const auto *RHSVar =
5453 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5454 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5455 AtomicRedGen, XExpr, EExpr, UpExpr);
5456 } else {
5457 // Emit atomic reduction for array subscript or single variable.
5458 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5459 }
5460 } else {
5461 // Emit as a critical region.
5462 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5463 const Expr *, const Expr *) {
5464 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5465 std::string Name = RT.getName({"atomic_reduction"});
5467 CGF, Name,
5468 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5469 Action.Enter(CGF);
5470 emitReductionCombiner(CGF, E);
5471 },
5472 Loc);
5473 };
5474 if ((*IPriv)->getType()->isArrayType()) {
5475 const auto *LHSVar =
5476 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5477 const auto *RHSVar =
5478 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5479 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5480 CritRedGen);
5481 } else {
5482 CritRedGen(CGF, nullptr, nullptr, nullptr);
5483 }
5484 }
5485 ++ILHS;
5486 ++IRHS;
5487 ++IPriv;
5488 }
5489 };
5490 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5491 if (!WithNowait) {
5492 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5493 llvm::Value *EndArgs[] = {
5494 IdentTLoc, // ident_t *<loc>
5495 ThreadId, // i32 <gtid>
5496 Lock // kmp_critical_name *&<lock>
5497 };
5498 CommonActionTy Action(nullptr, {},
5499 OMPBuilder.getOrCreateRuntimeFunction(
5500 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5501 EndArgs);
5502 AtomicRCG.setAction(Action);
5503 AtomicRCG(CGF);
5504 } else {
5505 AtomicRCG(CGF);
5506 }
5507
5508 CGF.EmitBranch(DefaultBB);
5509 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5510 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5511 "PrivateVarReduction: Privates size mismatch");
5512 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5513 "PrivateVarReduction: ReductionOps size mismatch");
5514 for (unsigned I : llvm::seq<unsigned>(
5515 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5516 if (Options.IsPrivateVarReduction[I])
5517 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5518 OrgRHSExprs[I], OrgReductionOps[I]);
5519 }
5520}
5521
5522/// Generates unique name for artificial threadprivate variables.
5523/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5524static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5525 const Expr *Ref) {
5526 SmallString<256> Buffer;
5527 llvm::raw_svector_ostream Out(Buffer);
5528 const clang::DeclRefExpr *DE;
5529 const VarDecl *D = ::getBaseDecl(Ref, DE);
5530 if (!D)
5531 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5532 D = D->getCanonicalDecl();
5533 std::string Name = CGM.getOpenMPRuntime().getName(
5534 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5535 Out << Prefix << Name << "_"
5537 return std::string(Out.str());
5538}
5539
5540/// Emits reduction initializer function:
5541/// \code
5542/// void @.red_init(void* %arg, void* %orig) {
5543/// %0 = bitcast void* %arg to <type>*
5544/// store <type> <init>, <type>* %0
5545/// ret void
5546/// }
5547/// \endcode
5548static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5549 SourceLocation Loc,
5550 ReductionCodeGen &RCG, unsigned N) {
5551 ASTContext &C = CGM.getContext();
5552 QualType VoidPtrTy = C.VoidPtrTy;
5553 VoidPtrTy.addRestrict();
5554 FunctionArgList Args;
5555 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5557 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5559 Args.emplace_back(&Param);
5560 Args.emplace_back(&ParamOrig);
5561 const auto &FnInfo =
5562 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5563 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5564 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5565 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5566 Name, &CGM.getModule());
5567 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5568 Fn->setDoesNotRecurse();
5569 CodeGenFunction CGF(CGM);
5570 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5571 QualType PrivateType = RCG.getPrivateType(N);
5572 Address PrivateAddr = CGF.EmitLoadOfPointer(
5573 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5574 C.getPointerType(PrivateType)->castAs<PointerType>());
5575 llvm::Value *Size = nullptr;
5576 // If the size of the reduction item is non-constant, load it from global
5577 // threadprivate variable.
5578 if (RCG.getSizes(N).second) {
5580 CGF, CGM.getContext().getSizeType(),
5581 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5582 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5583 CGM.getContext().getSizeType(), Loc);
5584 }
5585 RCG.emitAggregateType(CGF, N, Size);
5586 Address OrigAddr = Address::invalid();
5587 // If initializer uses initializer from declare reduction construct, emit a
5588 // pointer to the address of the original reduction item (reuired by reduction
5589 // initializer)
5590 if (RCG.usesReductionInitializer(N)) {
5591 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5592 OrigAddr = CGF.EmitLoadOfPointer(
5593 SharedAddr,
5594 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5595 }
5596 // Emit the initializer:
5597 // %0 = bitcast void* %arg to <type>*
5598 // store <type> <init>, <type>* %0
5599 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5600 [](CodeGenFunction &) { return false; });
5601 CGF.FinishFunction();
5602 return Fn;
5603}
5604
5605/// Emits reduction combiner function:
5606/// \code
5607/// void @.red_comb(void* %arg0, void* %arg1) {
5608/// %lhs = bitcast void* %arg0 to <type>*
5609/// %rhs = bitcast void* %arg1 to <type>*
5610/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5611/// store <type> %2, <type>* %lhs
5612/// ret void
5613/// }
5614/// \endcode
5615static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5616 SourceLocation Loc,
5617 ReductionCodeGen &RCG, unsigned N,
5618 const Expr *ReductionOp,
5619 const Expr *LHS, const Expr *RHS,
5620 const Expr *PrivateRef) {
5621 ASTContext &C = CGM.getContext();
5622 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5623 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5624 FunctionArgList Args;
5625 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5626 C.VoidPtrTy, ImplicitParamKind::Other);
5627 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5629 Args.emplace_back(&ParamInOut);
5630 Args.emplace_back(&ParamIn);
5631 const auto &FnInfo =
5632 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5633 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5634 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5635 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5636 Name, &CGM.getModule());
5637 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5638 Fn->setDoesNotRecurse();
5639 CodeGenFunction CGF(CGM);
5640 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5641 llvm::Value *Size = nullptr;
5642 // If the size of the reduction item is non-constant, load it from global
5643 // threadprivate variable.
5644 if (RCG.getSizes(N).second) {
5646 CGF, CGM.getContext().getSizeType(),
5647 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5648 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5649 CGM.getContext().getSizeType(), Loc);
5650 }
5651 RCG.emitAggregateType(CGF, N, Size);
5652 // Remap lhs and rhs variables to the addresses of the function arguments.
5653 // %lhs = bitcast void* %arg0 to <type>*
5654 // %rhs = bitcast void* %arg1 to <type>*
5655 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5656 PrivateScope.addPrivate(
5657 LHSVD,
5658 // Pull out the pointer to the variable.
5660 CGF.GetAddrOfLocalVar(&ParamInOut)
5661 .withElementType(CGF.Builder.getPtrTy(0)),
5662 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5663 PrivateScope.addPrivate(
5664 RHSVD,
5665 // Pull out the pointer to the variable.
5667 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5668 CGF.Builder.getPtrTy(0)),
5669 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5670 PrivateScope.Privatize();
5671 // Emit the combiner body:
5672 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5673 // store <type> %2, <type>* %lhs
5675 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5676 cast<DeclRefExpr>(RHS));
5677 CGF.FinishFunction();
5678 return Fn;
5679}
5680
5681/// Emits reduction finalizer function:
5682/// \code
5683/// void @.red_fini(void* %arg) {
5684/// %0 = bitcast void* %arg to <type>*
5685/// <destroy>(<type>* %0)
5686/// ret void
5687/// }
5688/// \endcode
5689static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5690 SourceLocation Loc,
5691 ReductionCodeGen &RCG, unsigned N) {
5692 if (!RCG.needCleanups(N))
5693 return nullptr;
5694 ASTContext &C = CGM.getContext();
5695 FunctionArgList Args;
5696 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5698 Args.emplace_back(&Param);
5699 const auto &FnInfo =
5700 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5701 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5702 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5703 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5704 Name, &CGM.getModule());
5705 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5706 Fn->setDoesNotRecurse();
5707 CodeGenFunction CGF(CGM);
5708 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5709 Address PrivateAddr = CGF.EmitLoadOfPointer(
5710 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5711 llvm::Value *Size = nullptr;
5712 // If the size of the reduction item is non-constant, load it from global
5713 // threadprivate variable.
5714 if (RCG.getSizes(N).second) {
5716 CGF, CGM.getContext().getSizeType(),
5717 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5718 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5719 CGM.getContext().getSizeType(), Loc);
5720 }
5721 RCG.emitAggregateType(CGF, N, Size);
5722 // Emit the finalizer body:
5723 // <destroy>(<type>* %0)
5724 RCG.emitCleanups(CGF, N, PrivateAddr);
5725 CGF.FinishFunction(Loc);
5726 return Fn;
5727}
5728
5731 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5732 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5733 return nullptr;
5734
5735 // Build typedef struct:
5736 // kmp_taskred_input {
5737 // void *reduce_shar; // shared reduction item
5738 // void *reduce_orig; // original reduction item used for initialization
5739 // size_t reduce_size; // size of data item
5740 // void *reduce_init; // data initialization routine
5741 // void *reduce_fini; // data finalization routine
5742 // void *reduce_comb; // data combiner routine
5743 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5744 // } kmp_taskred_input_t;
5745 ASTContext &C = CGM.getContext();
5746 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5747 RD->startDefinition();
5748 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5749 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5750 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5751 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5752 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5753 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5754 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5755 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5756 RD->completeDefinition();
5757 CanQualType RDType = C.getCanonicalTagType(RD);
5758 unsigned Size = Data.ReductionVars.size();
5759 llvm::APInt ArraySize(/*numBits=*/64, Size);
5760 QualType ArrayRDType =
5761 C.getConstantArrayType(RDType, ArraySize, nullptr,
5762 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5763 // kmp_task_red_input_t .rd_input.[Size];
5764 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5765 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5766 Data.ReductionCopies, Data.ReductionOps);
5767 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5768 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5769 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5770 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5771 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5772 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5773 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5774 ".rd_input.gep.");
5775 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5776 // ElemLVal.reduce_shar = &Shareds[Cnt];
5777 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5778 RCG.emitSharedOrigLValue(CGF, Cnt);
5779 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5780 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5781 // ElemLVal.reduce_orig = &Origs[Cnt];
5782 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5783 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5784 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5785 RCG.emitAggregateType(CGF, Cnt);
5786 llvm::Value *SizeValInChars;
5787 llvm::Value *SizeVal;
5788 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5789 // We use delayed creation/initialization for VLAs and array sections. It is
5790 // required because runtime does not provide the way to pass the sizes of
5791 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5792 // threadprivate global variables are used to store these values and use
5793 // them in the functions.
5794 bool DelayedCreation = !!SizeVal;
5795 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5796 /*isSigned=*/false);
5797 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5798 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5799 // ElemLVal.reduce_init = init;
5800 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5801 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5802 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5803 // ElemLVal.reduce_fini = fini;
5804 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5805 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5806 llvm::Value *FiniAddr =
5807 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5808 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5809 // ElemLVal.reduce_comb = comb;
5810 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5811 llvm::Value *CombAddr = emitReduceCombFunction(
5812 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5813 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5814 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5815 // ElemLVal.flags = 0;
5816 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5817 if (DelayedCreation) {
5819 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5820 FlagsLVal);
5821 } else
5822 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5823 }
5824 if (Data.IsReductionWithTaskMod) {
5825 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5826 // is_ws, int num, void *data);
5827 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5828 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5829 CGM.IntTy, /*isSigned=*/true);
5830 llvm::Value *Args[] = {
5831 IdentTLoc, GTid,
5832 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5833 /*isSigned=*/true),
5834 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5836 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5837 return CGF.EmitRuntimeCall(
5838 OMPBuilder.getOrCreateRuntimeFunction(
5839 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5840 Args);
5841 }
5842 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5843 llvm::Value *Args[] = {
5844 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5845 /*isSigned=*/true),
5846 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5848 CGM.VoidPtrTy)};
5849 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5850 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5851 Args);
5852}
5853
5855 SourceLocation Loc,
5856 bool IsWorksharingReduction) {
5857 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5858 // is_ws, int num, void *data);
5859 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5860 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5861 CGM.IntTy, /*isSigned=*/true);
5862 llvm::Value *Args[] = {IdentTLoc, GTid,
5863 llvm::ConstantInt::get(CGM.IntTy,
5864 IsWorksharingReduction ? 1 : 0,
5865 /*isSigned=*/true)};
5866 (void)CGF.EmitRuntimeCall(
5867 OMPBuilder.getOrCreateRuntimeFunction(
5868 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5869 Args);
5870}
5871
5873 SourceLocation Loc,
5874 ReductionCodeGen &RCG,
5875 unsigned N) {
5876 auto Sizes = RCG.getSizes(N);
5877 // Emit threadprivate global variable if the type is non-constant
5878 // (Sizes.second = nullptr).
5879 if (Sizes.second) {
5880 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5881 /*isSigned=*/false);
5883 CGF, CGM.getContext().getSizeType(),
5884 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5885 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5886 }
5887}
5888
5890 SourceLocation Loc,
5891 llvm::Value *ReductionsPtr,
5892 LValue SharedLVal) {
5893 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5894 // *d);
5895 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5896 CGM.IntTy,
5897 /*isSigned=*/true),
5898 ReductionsPtr,
5900 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5901 return Address(
5902 CGF.EmitRuntimeCall(
5903 OMPBuilder.getOrCreateRuntimeFunction(
5904 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5905 Args),
5906 CGF.Int8Ty, SharedLVal.getAlignment());
5907}
5908
5910 const OMPTaskDataTy &Data) {
5911 if (!CGF.HaveInsertPoint())
5912 return;
5913
5914 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5915 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5916 OMPBuilder.createTaskwait(CGF.Builder);
5917 } else {
5918 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5919 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5920 auto &M = CGM.getModule();
5921 Address DependenciesArray = Address::invalid();
5922 llvm::Value *NumOfElements;
5923 std::tie(NumOfElements, DependenciesArray) =
5924 emitDependClause(CGF, Data.Dependences, Loc);
5925 if (!Data.Dependences.empty()) {
5926 llvm::Value *DepWaitTaskArgs[7];
5927 DepWaitTaskArgs[0] = UpLoc;
5928 DepWaitTaskArgs[1] = ThreadID;
5929 DepWaitTaskArgs[2] = NumOfElements;
5930 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5931 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5932 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5933 DepWaitTaskArgs[6] =
5934 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5935
5936 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5937
5938 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5939 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5940 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5941 // kmp_int32 has_no_wait); if dependence info is specified.
5942 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5943 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5944 DepWaitTaskArgs);
5945
5946 } else {
5947
5948 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5949 // global_tid);
5950 llvm::Value *Args[] = {UpLoc, ThreadID};
5951 // Ignore return result until untied tasks are supported.
5952 CGF.EmitRuntimeCall(
5953 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5954 Args);
5955 }
5956 }
5957
5958 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5959 Region->emitUntiedSwitch(CGF);
5960}
5961
5963 OpenMPDirectiveKind InnerKind,
5964 const RegionCodeGenTy &CodeGen,
5965 bool HasCancel) {
5966 if (!CGF.HaveInsertPoint())
5967 return;
5968 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5969 InnerKind != OMPD_critical &&
5970 InnerKind != OMPD_master &&
5971 InnerKind != OMPD_masked);
5972 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5973}
5974
5975namespace {
5976enum RTCancelKind {
5977 CancelNoreq = 0,
5978 CancelParallel = 1,
5979 CancelLoop = 2,
5980 CancelSections = 3,
5981 CancelTaskgroup = 4
5982};
5983} // anonymous namespace
5984
5985static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5986 RTCancelKind CancelKind = CancelNoreq;
5987 if (CancelRegion == OMPD_parallel)
5988 CancelKind = CancelParallel;
5989 else if (CancelRegion == OMPD_for)
5990 CancelKind = CancelLoop;
5991 else if (CancelRegion == OMPD_sections)
5992 CancelKind = CancelSections;
5993 else {
5994 assert(CancelRegion == OMPD_taskgroup);
5995 CancelKind = CancelTaskgroup;
5996 }
5997 return CancelKind;
5998}
5999
6002 OpenMPDirectiveKind CancelRegion) {
6003 if (!CGF.HaveInsertPoint())
6004 return;
6005 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6006 // global_tid, kmp_int32 cncl_kind);
6007 if (auto *OMPRegionInfo =
6008 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6009 // For 'cancellation point taskgroup', the task region info may not have a
6010 // cancel. This may instead happen in another adjacent task.
6011 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6012 llvm::Value *Args[] = {
6013 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6014 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6015 // Ignore return result until untied tasks are supported.
6016 llvm::Value *Result = CGF.EmitRuntimeCall(
6017 OMPBuilder.getOrCreateRuntimeFunction(
6018 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6019 Args);
6020 // if (__kmpc_cancellationpoint()) {
6021 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6022 // exit from construct;
6023 // }
6024 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6025 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6026 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6027 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6028 CGF.EmitBlock(ExitBB);
6029 if (CancelRegion == OMPD_parallel)
6030 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6031 // exit from construct;
6032 CodeGenFunction::JumpDest CancelDest =
6033 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6034 CGF.EmitBranchThroughCleanup(CancelDest);
6035 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6036 }
6037 }
6038}
6039
6041 const Expr *IfCond,
6042 OpenMPDirectiveKind CancelRegion) {
6043 if (!CGF.HaveInsertPoint())
6044 return;
6045 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6046 // kmp_int32 cncl_kind);
6047 auto &M = CGM.getModule();
6048 if (auto *OMPRegionInfo =
6049 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6050 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6051 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6052 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6053 llvm::Value *Args[] = {
6054 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6055 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6056 // Ignore return result until untied tasks are supported.
6057 llvm::Value *Result = CGF.EmitRuntimeCall(
6058 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6059 // if (__kmpc_cancel()) {
6060 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6061 // exit from construct;
6062 // }
6063 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6064 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6065 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6066 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6067 CGF.EmitBlock(ExitBB);
6068 if (CancelRegion == OMPD_parallel)
6069 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6070 // exit from construct;
6071 CodeGenFunction::JumpDest CancelDest =
6072 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6073 CGF.EmitBranchThroughCleanup(CancelDest);
6074 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6075 };
6076 if (IfCond) {
6077 emitIfClause(CGF, IfCond, ThenGen,
6078 [](CodeGenFunction &, PrePostActionTy &) {});
6079 } else {
6080 RegionCodeGenTy ThenRCG(ThenGen);
6081 ThenRCG(CGF);
6082 }
6083 }
6084}
6085
6086namespace {
6087/// Cleanup action for uses_allocators support.
6088class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6090
6091public:
6092 OMPUsesAllocatorsActionTy(
6093 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6094 : Allocators(Allocators) {}
6095 void Enter(CodeGenFunction &CGF) override {
6096 if (!CGF.HaveInsertPoint())
6097 return;
6098 for (const auto &AllocatorData : Allocators) {
6100 CGF, AllocatorData.first, AllocatorData.second);
6101 }
6102 }
6103 void Exit(CodeGenFunction &CGF) override {
6104 if (!CGF.HaveInsertPoint())
6105 return;
6106 for (const auto &AllocatorData : Allocators) {
6108 AllocatorData.first);
6109 }
6110 }
6111};
6112} // namespace
6113
6115 const OMPExecutableDirective &D, StringRef ParentName,
6116 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6117 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6118 assert(!ParentName.empty() && "Invalid target entry parent name!");
6121 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6122 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6123 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6124 if (!D.AllocatorTraits)
6125 continue;
6126 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6127 }
6128 }
6129 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6130 CodeGen.setAction(UsesAllocatorAction);
6131 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6132 IsOffloadEntry, CodeGen);
6133}
6134
6136 const Expr *Allocator,
6137 const Expr *AllocatorTraits) {
6138 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6139 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6140 // Use default memspace handle.
6141 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6142 llvm::Value *NumTraits = llvm::ConstantInt::get(
6144 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6145 ->getSize()
6146 .getLimitedValue());
6147 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6149 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6150 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6151 AllocatorTraitsLVal.getBaseInfo(),
6152 AllocatorTraitsLVal.getTBAAInfo());
6153 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6154
6155 llvm::Value *AllocatorVal =
6156 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6157 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6158 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6159 // Store to allocator.
6161 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6162 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6163 AllocatorVal =
6164 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6165 Allocator->getType(), Allocator->getExprLoc());
6166 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6167}
6168
6170 const Expr *Allocator) {
6171 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6172 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6173 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6174 llvm::Value *AllocatorVal =
6175 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6176 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6177 CGF.getContext().VoidPtrTy,
6178 Allocator->getExprLoc());
6179 (void)CGF.EmitRuntimeCall(
6180 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6181 OMPRTL___kmpc_destroy_allocator),
6182 {ThreadId, AllocatorVal});
6183}
6184
6187 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6188 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6189 "invalid default attrs structure");
6190 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6191 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6192
6193 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6194 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6195 /*UpperBoundOnly=*/true);
6196
6197 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6198 for (auto *A : C->getAttrs()) {
6199 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6200 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6201 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6202 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6203 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6204 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6205 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6206 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6207 &AttrMaxThreadsVal);
6208 else
6209 continue;
6210
6211 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6212 if (AttrMaxThreadsVal > 0)
6213 MaxThreadsVal = MaxThreadsVal > 0
6214 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6215 : AttrMaxThreadsVal;
6216 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6217 if (AttrMaxBlocksVal > 0)
6218 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6219 : AttrMaxBlocksVal;
6220 }
6221 }
6222}
6223
6225 const OMPExecutableDirective &D, StringRef ParentName,
6226 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6227 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6228
6229 llvm::TargetRegionEntryInfo EntryInfo =
6230 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6231
6232 CodeGenFunction CGF(CGM, true);
6233 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6234 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6235 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6236
6237 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6238 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6239 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6240 };
6241
6242 cantFail(OMPBuilder.emitTargetRegionFunction(
6243 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6244 OutlinedFnID));
6245
6246 if (!OutlinedFn)
6247 return;
6248
6249 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6250
6251 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6252 for (auto *A : C->getAttrs()) {
6253 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6254 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6255 }
6256 }
6257}
6258
6259/// Checks if the expression is constant or does not have non-trivial function
6260/// calls.
6261static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6262 // We can skip constant expressions.
6263 // We can skip expressions with trivial calls or simple expressions.
6265 !E->hasNonTrivialCall(Ctx)) &&
6266 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6267}
6268
6270 const Stmt *Body) {
6271 const Stmt *Child = Body->IgnoreContainers();
6272 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6273 Child = nullptr;
6274 for (const Stmt *S : C->body()) {
6275 if (const auto *E = dyn_cast<Expr>(S)) {
6276 if (isTrivial(Ctx, E))
6277 continue;
6278 }
6279 // Some of the statements can be ignored.
6282 continue;
6283 // Analyze declarations.
6284 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6285 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6286 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6287 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6288 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6289 isa<UsingDirectiveDecl>(D) ||
6290 isa<OMPDeclareReductionDecl>(D) ||
6291 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6292 return true;
6293 const auto *VD = dyn_cast<VarDecl>(D);
6294 if (!VD)
6295 return false;
6296 return VD->hasGlobalStorage() || !VD->isUsed();
6297 }))
6298 continue;
6299 }
6300 // Found multiple children - cannot get the one child only.
6301 if (Child)
6302 return nullptr;
6303 Child = S;
6304 }
6305 if (Child)
6306 Child = Child->IgnoreContainers();
6307 }
6308 return Child;
6309}
6310
6312 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6313 int32_t &MaxTeamsVal) {
6314
6315 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6316 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6317 "Expected target-based executable directive.");
6318 switch (DirectiveKind) {
6319 case OMPD_target: {
6320 const auto *CS = D.getInnermostCapturedStmt();
6321 const auto *Body =
6322 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6323 const Stmt *ChildStmt =
6325 if (const auto *NestedDir =
6326 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6327 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6328 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6329 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6330 ->getNumTeams()
6331 .front();
6332 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6333 if (auto Constant =
6334 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6335 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6336 return NumTeams;
6337 }
6338 MinTeamsVal = MaxTeamsVal = 0;
6339 return nullptr;
6340 }
6341 MinTeamsVal = MaxTeamsVal = 1;
6342 return nullptr;
6343 }
6344 // A value of -1 is used to check if we need to emit no teams region
6345 MinTeamsVal = MaxTeamsVal = -1;
6346 return nullptr;
6347 }
6348 case OMPD_target_teams_loop:
6349 case OMPD_target_teams:
6350 case OMPD_target_teams_distribute:
6351 case OMPD_target_teams_distribute_simd:
6352 case OMPD_target_teams_distribute_parallel_for:
6353 case OMPD_target_teams_distribute_parallel_for_simd: {
6354 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6355 const Expr *NumTeams =
6356 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6357 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6358 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6359 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6360 return NumTeams;
6361 }
6362 MinTeamsVal = MaxTeamsVal = 0;
6363 return nullptr;
6364 }
6365 case OMPD_target_parallel:
6366 case OMPD_target_parallel_for:
6367 case OMPD_target_parallel_for_simd:
6368 case OMPD_target_parallel_loop:
6369 case OMPD_target_simd:
6370 MinTeamsVal = MaxTeamsVal = 1;
6371 return nullptr;
6372 case OMPD_parallel:
6373 case OMPD_for:
6374 case OMPD_parallel_for:
6375 case OMPD_parallel_loop:
6376 case OMPD_parallel_master:
6377 case OMPD_parallel_sections:
6378 case OMPD_for_simd:
6379 case OMPD_parallel_for_simd:
6380 case OMPD_cancel:
6381 case OMPD_cancellation_point:
6382 case OMPD_ordered:
6383 case OMPD_threadprivate:
6384 case OMPD_allocate:
6385 case OMPD_task:
6386 case OMPD_simd:
6387 case OMPD_tile:
6388 case OMPD_unroll:
6389 case OMPD_sections:
6390 case OMPD_section:
6391 case OMPD_single:
6392 case OMPD_master:
6393 case OMPD_critical:
6394 case OMPD_taskyield:
6395 case OMPD_barrier:
6396 case OMPD_taskwait:
6397 case OMPD_taskgroup:
6398 case OMPD_atomic:
6399 case OMPD_flush:
6400 case OMPD_depobj:
6401 case OMPD_scan:
6402 case OMPD_teams:
6403 case OMPD_target_data:
6404 case OMPD_target_exit_data:
6405 case OMPD_target_enter_data:
6406 case OMPD_distribute:
6407 case OMPD_distribute_simd:
6408 case OMPD_distribute_parallel_for:
6409 case OMPD_distribute_parallel_for_simd:
6410 case OMPD_teams_distribute:
6411 case OMPD_teams_distribute_simd:
6412 case OMPD_teams_distribute_parallel_for:
6413 case OMPD_teams_distribute_parallel_for_simd:
6414 case OMPD_target_update:
6415 case OMPD_declare_simd:
6416 case OMPD_declare_variant:
6417 case OMPD_begin_declare_variant:
6418 case OMPD_end_declare_variant:
6419 case OMPD_declare_target:
6420 case OMPD_end_declare_target:
6421 case OMPD_declare_reduction:
6422 case OMPD_declare_mapper:
6423 case OMPD_taskloop:
6424 case OMPD_taskloop_simd:
6425 case OMPD_master_taskloop:
6426 case OMPD_master_taskloop_simd:
6427 case OMPD_parallel_master_taskloop:
6428 case OMPD_parallel_master_taskloop_simd:
6429 case OMPD_requires:
6430 case OMPD_metadirective:
6431 case OMPD_unknown:
6432 break;
6433 default:
6434 break;
6435 }
6436 llvm_unreachable("Unexpected directive kind.");
6437}
6438
6440 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6441 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6442 "Clauses associated with the teams directive expected to be emitted "
6443 "only for the host!");
6444 CGBuilderTy &Bld = CGF.Builder;
6445 int32_t MinNT = -1, MaxNT = -1;
6446 const Expr *NumTeams =
6447 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6448 if (NumTeams != nullptr) {
6449 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6450
6451 switch (DirectiveKind) {
6452 case OMPD_target: {
6453 const auto *CS = D.getInnermostCapturedStmt();
6454 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6455 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6456 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6457 /*IgnoreResultAssign*/ true);
6458 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6459 /*isSigned=*/true);
6460 }
6461 case OMPD_target_teams:
6462 case OMPD_target_teams_distribute:
6463 case OMPD_target_teams_distribute_simd:
6464 case OMPD_target_teams_distribute_parallel_for:
6465 case OMPD_target_teams_distribute_parallel_for_simd: {
6466 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6467 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6468 /*IgnoreResultAssign*/ true);
6469 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6470 /*isSigned=*/true);
6471 }
6472 default:
6473 break;
6474 }
6475 }
6476
6477 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6478 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6479}
6480
6481/// Check for a num threads constant value (stored in \p DefaultVal), or
6482/// expression (stored in \p E). If the value is conditional (via an if-clause),
6483/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6484/// nullptr, no expression evaluation is perfomed.
6485static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6486 const Expr **E, int32_t &UpperBound,
6487 bool UpperBoundOnly, llvm::Value **CondVal) {
6489 CGF.getContext(), CS->getCapturedStmt());
6490 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6491 if (!Dir)
6492 return;
6493
6494 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6495 // Handle if clause. If if clause present, the number of threads is
6496 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6497 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6498 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6499 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6500 const OMPIfClause *IfClause = nullptr;
6501 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6502 if (C->getNameModifier() == OMPD_unknown ||
6503 C->getNameModifier() == OMPD_parallel) {
6504 IfClause = C;
6505 break;
6506 }
6507 }
6508 if (IfClause) {
6509 const Expr *CondExpr = IfClause->getCondition();
6510 bool Result;
6511 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6512 if (!Result) {
6513 UpperBound = 1;
6514 return;
6515 }
6516 } else {
6518 if (const auto *PreInit =
6519 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6520 for (const auto *I : PreInit->decls()) {
6521 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6522 CGF.EmitVarDecl(cast<VarDecl>(*I));
6523 } else {
6526 CGF.EmitAutoVarCleanups(Emission);
6527 }
6528 }
6529 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6530 }
6531 }
6532 }
6533 }
6534 // Check the value of num_threads clause iff if clause was not specified
6535 // or is not evaluated to false.
6536 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6537 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6538 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6539 const auto *NumThreadsClause =
6540 Dir->getSingleClause<OMPNumThreadsClause>();
6541 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6542 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6543 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6544 UpperBound =
6545 UpperBound
6546 ? Constant->getZExtValue()
6547 : std::min(UpperBound,
6548 static_cast<int32_t>(Constant->getZExtValue()));
6549 // If we haven't found a upper bound, remember we saw a thread limiting
6550 // clause.
6551 if (UpperBound == -1)
6552 UpperBound = 0;
6553 if (!E)
6554 return;
6555 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6556 if (const auto *PreInit =
6557 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6558 for (const auto *I : PreInit->decls()) {
6559 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6560 CGF.EmitVarDecl(cast<VarDecl>(*I));
6561 } else {
6564 CGF.EmitAutoVarCleanups(Emission);
6565 }
6566 }
6567 }
6568 *E = NTExpr;
6569 }
6570 return;
6571 }
6572 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6573 UpperBound = 1;
6574}
6575
6577 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6578 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6579 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6580 "Clauses associated with the teams directive expected to be emitted "
6581 "only for the host!");
6582 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6583 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6584 "Expected target-based executable directive.");
6585
6586 const Expr *NT = nullptr;
6587 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6588
6589 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6590 if (E->isIntegerConstantExpr(CGF.getContext())) {
6591 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6592 UpperBound = UpperBound ? Constant->getZExtValue()
6593 : std::min(UpperBound,
6594 int32_t(Constant->getZExtValue()));
6595 }
6596 // If we haven't found a upper bound, remember we saw a thread limiting
6597 // clause.
6598 if (UpperBound == -1)
6599 UpperBound = 0;
6600 if (EPtr)
6601 *EPtr = E;
6602 };
6603
6604 auto ReturnSequential = [&]() {
6605 UpperBound = 1;
6606 return NT;
6607 };
6608
6609 switch (DirectiveKind) {
6610 case OMPD_target: {
6611 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6612 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6614 CGF.getContext(), CS->getCapturedStmt());
6615 // TODO: The standard is not clear how to resolve two thread limit clauses,
6616 // let's pick the teams one if it's present, otherwise the target one.
6617 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6618 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6619 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6620 ThreadLimitClause = TLC;
6621 if (ThreadLimitExpr) {
6622 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6623 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6625 CGF,
6626 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6627 if (const auto *PreInit =
6628 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6629 for (const auto *I : PreInit->decls()) {
6630 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6631 CGF.EmitVarDecl(cast<VarDecl>(*I));
6632 } else {
6635 CGF.EmitAutoVarCleanups(Emission);
6636 }
6637 }
6638 }
6639 }
6640 }
6641 }
6642 if (ThreadLimitClause)
6643 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6644 ThreadLimitExpr);
6645 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6646 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6647 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6648 CS = Dir->getInnermostCapturedStmt();
6650 CGF.getContext(), CS->getCapturedStmt());
6651 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6652 }
6653 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6654 CS = Dir->getInnermostCapturedStmt();
6655 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6656 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6657 return ReturnSequential();
6658 }
6659 return NT;
6660 }
6661 case OMPD_target_teams: {
6662 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6663 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6664 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6665 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6666 ThreadLimitExpr);
6667 }
6668 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6669 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6671 CGF.getContext(), CS->getCapturedStmt());
6672 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6673 if (Dir->getDirectiveKind() == OMPD_distribute) {
6674 CS = Dir->getInnermostCapturedStmt();
6675 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6676 }
6677 }
6678 return NT;
6679 }
6680 case OMPD_target_teams_distribute:
6681 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6682 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6683 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6684 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6685 ThreadLimitExpr);
6686 }
6687 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6688 UpperBoundOnly, CondVal);
6689 return NT;
6690 case OMPD_target_teams_loop:
6691 case OMPD_target_parallel_loop:
6692 case OMPD_target_parallel:
6693 case OMPD_target_parallel_for:
6694 case OMPD_target_parallel_for_simd:
6695 case OMPD_target_teams_distribute_parallel_for:
6696 case OMPD_target_teams_distribute_parallel_for_simd: {
6697 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6698 const OMPIfClause *IfClause = nullptr;
6699 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6700 if (C->getNameModifier() == OMPD_unknown ||
6701 C->getNameModifier() == OMPD_parallel) {
6702 IfClause = C;
6703 break;
6704 }
6705 }
6706 if (IfClause) {
6707 const Expr *Cond = IfClause->getCondition();
6708 bool Result;
6709 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6710 if (!Result)
6711 return ReturnSequential();
6712 } else {
6714 *CondVal = CGF.EvaluateExprAsBool(Cond);
6715 }
6716 }
6717 }
6718 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6719 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6720 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6721 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6722 ThreadLimitExpr);
6723 }
6724 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6725 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6726 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6727 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6728 return NumThreadsClause->getNumThreads();
6729 }
6730 return NT;
6731 }
6732 case OMPD_target_teams_distribute_simd:
6733 case OMPD_target_simd:
6734 return ReturnSequential();
6735 default:
6736 break;
6737 }
6738 llvm_unreachable("Unsupported directive kind.");
6739}
6740
6742 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6743 llvm::Value *NumThreadsVal = nullptr;
6744 llvm::Value *CondVal = nullptr;
6745 llvm::Value *ThreadLimitVal = nullptr;
6746 const Expr *ThreadLimitExpr = nullptr;
6747 int32_t UpperBound = -1;
6748
6750 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6751 &ThreadLimitExpr);
6752
6753 // Thread limit expressions are used below, emit them.
6754 if (ThreadLimitExpr) {
6755 ThreadLimitVal =
6756 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6757 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6758 /*isSigned=*/false);
6759 }
6760
6761 // Generate the num teams expression.
6762 if (UpperBound == 1) {
6763 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6764 } else if (NT) {
6765 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6766 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6767 /*isSigned=*/false);
6768 } else if (ThreadLimitVal) {
6769 // If we do not have a num threads value but a thread limit, replace the
6770 // former with the latter. We know handled the thread limit expression.
6771 NumThreadsVal = ThreadLimitVal;
6772 ThreadLimitVal = nullptr;
6773 } else {
6774 // Default to "0" which means runtime choice.
6775 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6776 NumThreadsVal = CGF.Builder.getInt32(0);
6777 }
6778
6779 // Handle if clause. If if clause present, the number of threads is
6780 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6781 if (CondVal) {
6783 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6784 CGF.Builder.getInt32(1));
6785 }
6786
6787 // If the thread limit and num teams expression were present, take the
6788 // minimum.
6789 if (ThreadLimitVal) {
6790 NumThreadsVal = CGF.Builder.CreateSelect(
6791 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6792 ThreadLimitVal, NumThreadsVal);
6793 }
6794
6795 return NumThreadsVal;
6796}
6797
6798namespace {
6800
6801// Utility to handle information from clauses associated with a given
6802// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6803// It provides a convenient interface to obtain the information and generate
6804// code for that information.
6805class MappableExprsHandler {
6806public:
6807 /// Custom comparator for attach-pointer expressions that compares them by
6808 /// complexity (i.e. their component-depth) first, then by the order in which
6809 /// they were computed by collectAttachPtrExprInfo(), if they are semantically
6810 /// different.
6811 struct AttachPtrExprComparator {
6812 const MappableExprsHandler &Handler;
6813 // Cache of previous equality comparison results.
6814 mutable llvm::DenseMap<std::pair<const Expr *, const Expr *>, bool>
6815 CachedEqualityComparisons;
6816
6817 AttachPtrExprComparator(const MappableExprsHandler &H) : Handler(H) {}
6818 AttachPtrExprComparator() = delete;
6819
6820 // Return true iff LHS is "less than" RHS.
6821 bool operator()(const Expr *LHS, const Expr *RHS) const {
6822 if (LHS == RHS)
6823 return false;
6824
6825 // First, compare by complexity (depth)
6826 const auto ItLHS = Handler.AttachPtrComponentDepthMap.find(LHS);
6827 const auto ItRHS = Handler.AttachPtrComponentDepthMap.find(RHS);
6828
6829 std::optional<size_t> DepthLHS =
6830 (ItLHS != Handler.AttachPtrComponentDepthMap.end()) ? ItLHS->second
6831 : std::nullopt;
6832 std::optional<size_t> DepthRHS =
6833 (ItRHS != Handler.AttachPtrComponentDepthMap.end()) ? ItRHS->second
6834 : std::nullopt;
6835
6836 // std::nullopt (no attach pointer) has lowest complexity
6837 if (!DepthLHS.has_value() && !DepthRHS.has_value()) {
6838 // Both have same complexity, now check semantic equality
6839 if (areEqual(LHS, RHS))
6840 return false;
6841 // Different semantically, compare by computation order
6842 return wasComputedBefore(LHS, RHS);
6843 }
6844 if (!DepthLHS.has_value())
6845 return true; // LHS has lower complexity
6846 if (!DepthRHS.has_value())
6847 return false; // RHS has lower complexity
6848
6849 // Both have values, compare by depth (lower depth = lower complexity)
6850 if (DepthLHS.value() != DepthRHS.value())
6851 return DepthLHS.value() < DepthRHS.value();
6852
6853 // Same complexity, now check semantic equality
6854 if (areEqual(LHS, RHS))
6855 return false;
6856 // Different semantically, compare by computation order
6857 return wasComputedBefore(LHS, RHS);
6858 }
6859
6860 public:
6861 /// Return true if \p LHS and \p RHS are semantically equal. Uses pre-cached
6862 /// results, if available, otherwise does a recursive semantic comparison.
6863 bool areEqual(const Expr *LHS, const Expr *RHS) const {
6864 // Check cache first for faster lookup
6865 const auto CachedResultIt = CachedEqualityComparisons.find({LHS, RHS});
6866 if (CachedResultIt != CachedEqualityComparisons.end())
6867 return CachedResultIt->second;
6868
6869 bool ComparisonResult = areSemanticallyEqual(LHS, RHS);
6870
6871 // Cache the result for future lookups (both orders since semantic
6872 // equality is commutative)
6873 CachedEqualityComparisons[{LHS, RHS}] = ComparisonResult;
6874 CachedEqualityComparisons[{RHS, LHS}] = ComparisonResult;
6875 return ComparisonResult;
6876 }
6877
6878 /// Compare the two attach-ptr expressions by their computation order.
6879 /// Returns true iff LHS was computed before RHS by
6880 /// collectAttachPtrExprInfo().
6881 bool wasComputedBefore(const Expr *LHS, const Expr *RHS) const {
6882 const size_t &OrderLHS = Handler.AttachPtrComputationOrderMap.at(LHS);
6883 const size_t &OrderRHS = Handler.AttachPtrComputationOrderMap.at(RHS);
6884
6885 return OrderLHS < OrderRHS;
6886 }
6887
6888 private:
6889 /// Helper function to compare attach-pointer expressions semantically.
6890 /// This function handles various expression types that can be part of an
6891 /// attach-pointer.
6892 /// TODO: Not urgent, but we should ideally return true when comparing
6893 /// `p[10]`, `*(p + 10)`, `*(p + 5 + 5)`, `p[10:1]` etc.
6894 bool areSemanticallyEqual(const Expr *LHS, const Expr *RHS) const {
6895 if (LHS == RHS)
6896 return true;
6897
6898 // If only one is null, they aren't equal
6899 if (!LHS || !RHS)
6900 return false;
6901
6902 ASTContext &Ctx = Handler.CGF.getContext();
6903 // Strip away parentheses and no-op casts to get to the core expression
6904 LHS = LHS->IgnoreParenNoopCasts(Ctx);
6905 RHS = RHS->IgnoreParenNoopCasts(Ctx);
6906
6907 // Direct pointer comparison of the underlying expressions
6908 if (LHS == RHS)
6909 return true;
6910
6911 // Check if the expression classes match
6912 if (LHS->getStmtClass() != RHS->getStmtClass())
6913 return false;
6914
6915 // Handle DeclRefExpr (variable references)
6916 if (const auto *LD = dyn_cast<DeclRefExpr>(LHS)) {
6917 const auto *RD = dyn_cast<DeclRefExpr>(RHS);
6918 if (!RD)
6919 return false;
6920 return LD->getDecl()->getCanonicalDecl() ==
6921 RD->getDecl()->getCanonicalDecl();
6922 }
6923
6924 // Handle ArraySubscriptExpr (array indexing like a[i])
6925 if (const auto *LA = dyn_cast<ArraySubscriptExpr>(LHS)) {
6926 const auto *RA = dyn_cast<ArraySubscriptExpr>(RHS);
6927 if (!RA)
6928 return false;
6929 return areSemanticallyEqual(LA->getBase(), RA->getBase()) &&
6930 areSemanticallyEqual(LA->getIdx(), RA->getIdx());
6931 }
6932
6933 // Handle MemberExpr (member access like s.m or p->m)
6934 if (const auto *LM = dyn_cast<MemberExpr>(LHS)) {
6935 const auto *RM = dyn_cast<MemberExpr>(RHS);
6936 if (!RM)
6937 return false;
6938 if (LM->getMemberDecl()->getCanonicalDecl() !=
6939 RM->getMemberDecl()->getCanonicalDecl())
6940 return false;
6941 return areSemanticallyEqual(LM->getBase(), RM->getBase());
6942 }
6943
6944 // Handle UnaryOperator (unary operations like *p, &x, etc.)
6945 if (const auto *LU = dyn_cast<UnaryOperator>(LHS)) {
6946 const auto *RU = dyn_cast<UnaryOperator>(RHS);
6947 if (!RU)
6948 return false;
6949 if (LU->getOpcode() != RU->getOpcode())
6950 return false;
6951 return areSemanticallyEqual(LU->getSubExpr(), RU->getSubExpr());
6952 }
6953
6954 // Handle BinaryOperator (binary operations like p + offset)
6955 if (const auto *LB = dyn_cast<BinaryOperator>(LHS)) {
6956 const auto *RB = dyn_cast<BinaryOperator>(RHS);
6957 if (!RB)
6958 return false;
6959 if (LB->getOpcode() != RB->getOpcode())
6960 return false;
6961 return areSemanticallyEqual(LB->getLHS(), RB->getLHS()) &&
6962 areSemanticallyEqual(LB->getRHS(), RB->getRHS());
6963 }
6964
6965 // Handle ArraySectionExpr (array sections like a[0:1])
6966 // Attach pointers should not contain array-sections, but currently we
6967 // don't emit an error.
6968 if (const auto *LAS = dyn_cast<ArraySectionExpr>(LHS)) {
6969 const auto *RAS = dyn_cast<ArraySectionExpr>(RHS);
6970 if (!RAS)
6971 return false;
6972 return areSemanticallyEqual(LAS->getBase(), RAS->getBase()) &&
6973 areSemanticallyEqual(LAS->getLowerBound(),
6974 RAS->getLowerBound()) &&
6975 areSemanticallyEqual(LAS->getLength(), RAS->getLength());
6976 }
6977
6978 // Handle CastExpr (explicit casts)
6979 if (const auto *LC = dyn_cast<CastExpr>(LHS)) {
6980 const auto *RC = dyn_cast<CastExpr>(RHS);
6981 if (!RC)
6982 return false;
6983 if (LC->getCastKind() != RC->getCastKind())
6984 return false;
6985 return areSemanticallyEqual(LC->getSubExpr(), RC->getSubExpr());
6986 }
6987
6988 // Handle CXXThisExpr (this pointer)
6989 if (isa<CXXThisExpr>(LHS) && isa<CXXThisExpr>(RHS))
6990 return true;
6991
6992 // Handle IntegerLiteral (integer constants)
6993 if (const auto *LI = dyn_cast<IntegerLiteral>(LHS)) {
6994 const auto *RI = dyn_cast<IntegerLiteral>(RHS);
6995 if (!RI)
6996 return false;
6997 return LI->getValue() == RI->getValue();
6998 }
6999
7000 // Handle CharacterLiteral (character constants)
7001 if (const auto *LC = dyn_cast<CharacterLiteral>(LHS)) {
7002 const auto *RC = dyn_cast<CharacterLiteral>(RHS);
7003 if (!RC)
7004 return false;
7005 return LC->getValue() == RC->getValue();
7006 }
7007
7008 // Handle FloatingLiteral (floating point constants)
7009 if (const auto *LF = dyn_cast<FloatingLiteral>(LHS)) {
7010 const auto *RF = dyn_cast<FloatingLiteral>(RHS);
7011 if (!RF)
7012 return false;
7013 // Use bitwise comparison for floating point literals
7014 return LF->getValue().bitwiseIsEqual(RF->getValue());
7015 }
7016
7017 // Handle StringLiteral (string constants)
7018 if (const auto *LS = dyn_cast<StringLiteral>(LHS)) {
7019 const auto *RS = dyn_cast<StringLiteral>(RHS);
7020 if (!RS)
7021 return false;
7022 return LS->getString() == RS->getString();
7023 }
7024
7025 // Handle CXXNullPtrLiteralExpr (nullptr)
7027 return true;
7028
7029 // Handle CXXBoolLiteralExpr (true/false)
7030 if (const auto *LB = dyn_cast<CXXBoolLiteralExpr>(LHS)) {
7031 const auto *RB = dyn_cast<CXXBoolLiteralExpr>(RHS);
7032 if (!RB)
7033 return false;
7034 return LB->getValue() == RB->getValue();
7035 }
7036
7037 // Fallback for other forms - use the existing comparison method
7038 return Expr::isSameComparisonOperand(LHS, RHS);
7039 }
7040 };
7041
7042 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7043 static unsigned getFlagMemberOffset() {
7044 unsigned Offset = 0;
7045 for (uint64_t Remain =
7046 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7047 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
7048 !(Remain & 1); Remain = Remain >> 1)
7049 Offset++;
7050 return Offset;
7051 }
7052
7053 /// Class that holds debugging information for a data mapping to be passed to
7054 /// the runtime library.
7055 class MappingExprInfo {
7056 /// The variable declaration used for the data mapping.
7057 const ValueDecl *MapDecl = nullptr;
7058 /// The original expression used in the map clause, or null if there is
7059 /// none.
7060 const Expr *MapExpr = nullptr;
7061
7062 public:
7063 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7064 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7065
7066 const ValueDecl *getMapDecl() const { return MapDecl; }
7067 const Expr *getMapExpr() const { return MapExpr; }
7068 };
7069
7070 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
7071 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7072 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
7073 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
7074 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
7075 using MapNonContiguousArrayTy =
7076 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
7077 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7078 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
7079 using MapData =
7081 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
7082 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
7083 using MapDataArrayTy = SmallVector<MapData, 4>;
7084
7085 /// This structure contains combined information generated for mappable
7086 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7087 /// mappers, and non-contiguous information.
7088 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
7089 MapExprsArrayTy Exprs;
7090 MapValueDeclsArrayTy Mappers;
7091 MapValueDeclsArrayTy DevicePtrDecls;
7092
7093 /// Append arrays in \a CurInfo.
7094 void append(MapCombinedInfoTy &CurInfo) {
7095 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7096 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
7097 CurInfo.DevicePtrDecls.end());
7098 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7099 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
7100 }
7101 };
7102
7103 /// Map between a struct and the its lowest & highest elements which have been
7104 /// mapped.
7105 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7106 /// HE(FieldIndex, Pointer)}
7107 struct StructRangeInfoTy {
7108 MapCombinedInfoTy PreliminaryMapData;
7109 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7110 0, Address::invalid()};
7111 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7112 0, Address::invalid()};
7113 Address Base = Address::invalid();
7114 Address LB = Address::invalid();
7115 bool IsArraySection = false;
7116 bool HasCompleteRecord = false;
7117 };
7118
7119 /// A struct to store the attach pointer and pointee information, to be used
7120 /// when emitting an attach entry.
7121 struct AttachInfoTy {
7122 Address AttachPtrAddr = Address::invalid();
7123 Address AttachPteeAddr = Address::invalid();
7124 const ValueDecl *AttachPtrDecl = nullptr;
7125 const Expr *AttachMapExpr = nullptr;
7126
7127 bool isValid() const {
7128 return AttachPtrAddr.isValid() && AttachPteeAddr.isValid();
7129 }
7130 };
7131
7132 /// Check if there's any component list where the attach pointer expression
7133 /// matches the given captured variable.
7134 bool hasAttachEntryForCapturedVar(const ValueDecl *VD) const {
7135 for (const auto &AttachEntry : AttachPtrExprMap) {
7136 if (AttachEntry.second) {
7137 // Check if the attach pointer expression is a DeclRefExpr that
7138 // references the captured variable
7139 if (const auto *DRE = dyn_cast<DeclRefExpr>(AttachEntry.second))
7140 if (DRE->getDecl() == VD)
7141 return true;
7142 }
7143 }
7144 return false;
7145 }
7146
7147 /// Get the previously-cached attach pointer for a component list, if-any.
7148 const Expr *getAttachPtrExpr(
7150 const {
7151 const auto It = AttachPtrExprMap.find(Components);
7152 if (It != AttachPtrExprMap.end())
7153 return It->second;
7154
7155 return nullptr;
7156 }
7157
7158private:
7159 /// Kind that defines how a device pointer has to be returned.
7160 struct MapInfo {
7163 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7164 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7165 bool ReturnDevicePointer = false;
7166 bool IsImplicit = false;
7167 const ValueDecl *Mapper = nullptr;
7168 const Expr *VarRef = nullptr;
7169 bool ForDeviceAddr = false;
7170
7171 MapInfo() = default;
7172 MapInfo(
7174 OpenMPMapClauseKind MapType,
7175 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7176 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7177 bool ReturnDevicePointer, bool IsImplicit,
7178 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7179 bool ForDeviceAddr = false)
7180 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7181 MotionModifiers(MotionModifiers),
7182 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7183 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7184 };
7185
7186 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7187 /// member and there is no map information about it, then emission of that
7188 /// entry is deferred until the whole struct has been processed.
7189 struct DeferredDevicePtrEntryTy {
7190 const Expr *IE = nullptr;
7191 const ValueDecl *VD = nullptr;
7192 bool ForDeviceAddr = false;
7193
7194 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7195 bool ForDeviceAddr)
7196 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7197 };
7198
7199 /// The target directive from where the mappable clauses were extracted. It
7200 /// is either a executable directive or a user-defined mapper directive.
7201 llvm::PointerUnion<const OMPExecutableDirective *,
7202 const OMPDeclareMapperDecl *>
7203 CurDir;
7204
7205 /// Function the directive is being generated for.
7206 CodeGenFunction &CGF;
7207
7208 /// Set of all first private variables in the current directive.
7209 /// bool data is set to true if the variable is implicitly marked as
7210 /// firstprivate, false otherwise.
7211 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7212
7213 /// Map between device pointer declarations and their expression components.
7214 /// The key value for declarations in 'this' is null.
7215 llvm::DenseMap<
7216 const ValueDecl *,
7217 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7218 DevPointersMap;
7219
7220 /// Map between device addr declarations and their expression components.
7221 /// The key value for declarations in 'this' is null.
7222 llvm::DenseMap<
7223 const ValueDecl *,
7224 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7225 HasDevAddrsMap;
7226
7227 /// Map between lambda declarations and their map type.
7228 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7229
7230 /// Map from component lists to their attach pointer expressions.
7232 const Expr *>
7233 AttachPtrExprMap;
7234
7235 /// Map from attach pointer expressions to their component depth.
7236 /// nullptr key has std::nullopt depth. This can be used to order attach-ptr
7237 /// expressions with increasing/decreasing depth.
7238 /// The component-depth of `nullptr` (i.e. no attach-ptr) is `std::nullopt`.
7239 /// TODO: Not urgent, but we should ideally use the number of pointer
7240 /// dereferences in an expr as an indicator of its complexity, instead of the
7241 /// component-depth. That would be needed for us to treat `p[1]`, `*(p + 10)`,
7242 /// `*(p + 5 + 5)` together.
7243 llvm::DenseMap<const Expr *, std::optional<size_t>>
7244 AttachPtrComponentDepthMap = {{nullptr, std::nullopt}};
7245
7246 /// Map from attach pointer expressions to the order they were computed in, in
7247 /// collectAttachPtrExprInfo().
7248 llvm::DenseMap<const Expr *, size_t> AttachPtrComputationOrderMap = {
7249 {nullptr, 0}};
7250
7251 /// An instance of attach-ptr-expr comparator that can be used throughout the
7252 /// lifetime of this handler.
7253 AttachPtrExprComparator AttachPtrComparator;
7254
7255 llvm::Value *getExprTypeSize(const Expr *E) const {
7256 QualType ExprTy = E->getType().getCanonicalType();
7257
7258 // Calculate the size for array shaping expression.
7259 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7260 llvm::Value *Size =
7261 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7262 for (const Expr *SE : OAE->getDimensions()) {
7263 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7264 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7265 CGF.getContext().getSizeType(),
7266 SE->getExprLoc());
7267 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7268 }
7269 return Size;
7270 }
7271
7272 // Reference types are ignored for mapping purposes.
7273 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7274 ExprTy = RefTy->getPointeeType().getCanonicalType();
7275
7276 // Given that an array section is considered a built-in type, we need to
7277 // do the calculation based on the length of the section instead of relying
7278 // on CGF.getTypeSize(E->getType()).
7279 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
7280 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
7281 OAE->getBase()->IgnoreParenImpCasts())
7283
7284 // If there is no length associated with the expression and lower bound is
7285 // not specified too, that means we are using the whole length of the
7286 // base.
7287 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7288 !OAE->getLowerBound())
7289 return CGF.getTypeSize(BaseTy);
7290
7291 llvm::Value *ElemSize;
7292 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7293 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7294 } else {
7295 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7296 assert(ATy && "Expecting array type if not a pointer type.");
7297 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7298 }
7299
7300 // If we don't have a length at this point, that is because we have an
7301 // array section with a single element.
7302 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7303 return ElemSize;
7304
7305 if (const Expr *LenExpr = OAE->getLength()) {
7306 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7307 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7308 CGF.getContext().getSizeType(),
7309 LenExpr->getExprLoc());
7310 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7311 }
7312 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7313 OAE->getLowerBound() && "expected array_section[lb:].");
7314 // Size = sizetype - lb * elemtype;
7315 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7316 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7317 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7318 CGF.getContext().getSizeType(),
7319 OAE->getLowerBound()->getExprLoc());
7320 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7321 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7322 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7323 LengthVal = CGF.Builder.CreateSelect(
7324 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7325 return LengthVal;
7326 }
7327 return CGF.getTypeSize(ExprTy);
7328 }
7329
7330 /// Return the corresponding bits for a given map clause modifier. Add
7331 /// a flag marking the map as a pointer if requested. Add a flag marking the
7332 /// map as the first one of a series of maps that relate to the same map
7333 /// expression.
7334 OpenMPOffloadMappingFlags getMapTypeBits(
7335 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7336 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7337 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7338 OpenMPOffloadMappingFlags Bits =
7339 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7340 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7341 switch (MapType) {
7342 case OMPC_MAP_alloc:
7343 case OMPC_MAP_release:
7344 // alloc and release is the default behavior in the runtime library, i.e.
7345 // if we don't pass any bits alloc/release that is what the runtime is
7346 // going to do. Therefore, we don't need to signal anything for these two
7347 // type modifiers.
7348 break;
7349 case OMPC_MAP_to:
7350 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7351 break;
7352 case OMPC_MAP_from:
7353 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7354 break;
7355 case OMPC_MAP_tofrom:
7356 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7357 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7358 break;
7359 case OMPC_MAP_delete:
7360 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7361 break;
7362 case OMPC_MAP_unknown:
7363 llvm_unreachable("Unexpected map type!");
7364 }
7365 if (AddPtrFlag)
7366 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7367 if (AddIsTargetParamFlag)
7368 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7369 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7370 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7371 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7372 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7373 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7374 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7375 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7376 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7377 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7378 if (IsNonContiguous)
7379 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7380 return Bits;
7381 }
7382
7383 /// Return true if the provided expression is a final array section. A
7384 /// final array section, is one whose length can't be proved to be one.
7385 bool isFinalArraySectionExpression(const Expr *E) const {
7386 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7387
7388 // It is not an array section and therefore not a unity-size one.
7389 if (!OASE)
7390 return false;
7391
7392 // An array section with no colon always refer to a single element.
7393 if (OASE->getColonLocFirst().isInvalid())
7394 return false;
7395
7396 const Expr *Length = OASE->getLength();
7397
7398 // If we don't have a length we have to check if the array has size 1
7399 // for this dimension. Also, we should always expect a length if the
7400 // base type is pointer.
7401 if (!Length) {
7402 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7403 OASE->getBase()->IgnoreParenImpCasts())
7405 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7406 return ATy->getSExtSize() != 1;
7407 // If we don't have a constant dimension length, we have to consider
7408 // the current section as having any size, so it is not necessarily
7409 // unitary. If it happen to be unity size, that's user fault.
7410 return true;
7411 }
7412
7413 // Check if the length evaluates to 1.
7414 Expr::EvalResult Result;
7415 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7416 return true; // Can have more that size 1.
7417
7418 llvm::APSInt ConstLength = Result.Val.getInt();
7419 return ConstLength.getSExtValue() != 1;
7420 }
7421
7422 /// A helper class to copy structures with overlapped elements, i.e. those
7423 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7424 /// are not explicitly copied have mapping nodes synthesized for them,
7425 /// taking care to avoid generating zero-sized copies.
7426 class CopyOverlappedEntryGaps {
7427 CodeGenFunction &CGF;
7428 MapCombinedInfoTy &CombinedInfo;
7429 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7430 const ValueDecl *MapDecl = nullptr;
7431 const Expr *MapExpr = nullptr;
7432 Address BP = Address::invalid();
7433 bool IsNonContiguous = false;
7434 uint64_t DimSize = 0;
7435 // These elements track the position as the struct is iterated over
7436 // (in order of increasing element address).
7437 const RecordDecl *LastParent = nullptr;
7438 uint64_t Cursor = 0;
7439 unsigned LastIndex = -1u;
7440 Address LB = Address::invalid();
7441
7442 public:
7443 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7444 MapCombinedInfoTy &CombinedInfo,
7445 OpenMPOffloadMappingFlags Flags,
7446 const ValueDecl *MapDecl, const Expr *MapExpr,
7447 Address BP, Address LB, bool IsNonContiguous,
7448 uint64_t DimSize)
7449 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7450 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7451 DimSize(DimSize), LB(LB) {}
7452
7453 void processField(
7454 const OMPClauseMappableExprCommon::MappableComponent &MC,
7455 const FieldDecl *FD,
7456 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7457 EmitMemberExprBase) {
7458 const RecordDecl *RD = FD->getParent();
7459 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7460 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7461 uint64_t FieldSize =
7463 Address ComponentLB = Address::invalid();
7464
7465 if (FD->getType()->isLValueReferenceType()) {
7466 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7467 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7468 ComponentLB =
7469 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7470 } else {
7471 ComponentLB =
7473 }
7474
7475 if (!LastParent)
7476 LastParent = RD;
7477 if (FD->getParent() == LastParent) {
7478 if (FD->getFieldIndex() != LastIndex + 1)
7479 copyUntilField(FD, ComponentLB);
7480 } else {
7481 LastParent = FD->getParent();
7482 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7483 copyUntilField(FD, ComponentLB);
7484 }
7485 Cursor = FieldOffset + FieldSize;
7486 LastIndex = FD->getFieldIndex();
7487 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7488 }
7489
7490 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7491 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7492 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7493 llvm::Value *Size =
7494 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7495 copySizedChunk(LBPtr, Size);
7496 }
7497
7498 void copyUntilEnd(Address HB) {
7499 if (LastParent) {
7500 const ASTRecordLayout &RL =
7501 CGF.getContext().getASTRecordLayout(LastParent);
7502 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7503 return;
7504 }
7505 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7506 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7507 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7508 LBPtr);
7509 copySizedChunk(LBPtr, Size);
7510 }
7511
7512 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7513 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7514 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7515 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7516 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7517 CombinedInfo.Pointers.push_back(Base);
7518 CombinedInfo.Sizes.push_back(
7519 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7520 CombinedInfo.Types.push_back(Flags);
7521 CombinedInfo.Mappers.push_back(nullptr);
7522 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7523 }
7524 };
7525
7526 /// Generate the base pointers, section pointers, sizes, map type bits, and
7527 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7528 /// map type, map or motion modifiers, and expression components.
7529 /// \a IsFirstComponent should be set to true if the provided set of
7530 /// components is the first associated with a capture.
7531 void generateInfoForComponentList(
7532 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7533 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7535 MapCombinedInfoTy &CombinedInfo,
7536 MapCombinedInfoTy &StructBaseCombinedInfo,
7537 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7538 bool IsImplicit, bool GenerateAllInfoForClauses,
7539 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7540 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7541 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7542 OverlappedElements = {},
7543 bool AreBothBasePtrAndPteeMapped = false) const {
7544 // The following summarizes what has to be generated for each map and the
7545 // types below. The generated information is expressed in this order:
7546 // base pointer, section pointer, size, flags
7547 // (to add to the ones that come from the map type and modifier).
7548 //
7549 // double d;
7550 // int i[100];
7551 // float *p;
7552 // int **a = &i;
7553 //
7554 // struct S1 {
7555 // int i;
7556 // float f[50];
7557 // }
7558 // struct S2 {
7559 // int i;
7560 // float f[50];
7561 // S1 s;
7562 // double *p;
7563 // struct S2 *ps;
7564 // int &ref;
7565 // }
7566 // S2 s;
7567 // S2 *ps;
7568 //
7569 // map(d)
7570 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7571 //
7572 // map(i)
7573 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7574 //
7575 // map(i[1:23])
7576 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7577 //
7578 // map(p)
7579 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7580 //
7581 // map(p[1:24])
7582 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7583 // in unified shared memory mode or for local pointers
7584 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7585 //
7586 // map((*a)[0:3])
7587 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7588 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7589 //
7590 // map(**a)
7591 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7592 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7593 //
7594 // map(s)
7595 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7596 //
7597 // map(s.i)
7598 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7599 //
7600 // map(s.s.f)
7601 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7602 //
7603 // map(s.p)
7604 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7605 //
7606 // map(to: s.p[:22])
7607 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7608 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7609 // &(s.p), &(s.p[0]), 22*sizeof(double),
7610 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7611 // (*) alloc space for struct members, only this is a target parameter
7612 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7613 // optimizes this entry out, same in the examples below)
7614 // (***) map the pointee (map: to)
7615 //
7616 // map(to: s.ref)
7617 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7618 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7619 // (*) alloc space for struct members, only this is a target parameter
7620 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7621 // optimizes this entry out, same in the examples below)
7622 // (***) map the pointee (map: to)
7623 //
7624 // map(s.ps)
7625 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7626 //
7627 // map(from: s.ps->s.i)
7628 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7629 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7630 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7631 //
7632 // map(to: s.ps->ps)
7633 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7634 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7635 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7636 //
7637 // map(s.ps->ps->ps)
7638 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7639 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7640 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7641 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7642 //
7643 // map(to: s.ps->ps->s.f[:22])
7644 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7645 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7646 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7647 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7648 //
7649 // map(ps)
7650 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7651 //
7652 // map(ps->i)
7653 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7654 //
7655 // map(ps->s.f)
7656 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7657 //
7658 // map(from: ps->p)
7659 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7660 //
7661 // map(to: ps->p[:22])
7662 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7663 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7664 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7665 //
7666 // map(ps->ps)
7667 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7668 //
7669 // map(from: ps->ps->s.i)
7670 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7671 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7672 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7673 //
7674 // map(from: ps->ps->ps)
7675 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7676 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7677 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7678 //
7679 // map(ps->ps->ps->ps)
7680 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7681 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7682 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7683 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7684 //
7685 // map(to: ps->ps->ps->s.f[:22])
7686 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7687 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7688 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7689 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7690 //
7691 // map(to: s.f[:22]) map(from: s.p[:33])
7692 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7693 // sizeof(double*) (**), TARGET_PARAM
7694 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7695 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7696 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7697 // (*) allocate contiguous space needed to fit all mapped members even if
7698 // we allocate space for members not mapped (in this example,
7699 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7700 // them as well because they fall between &s.f[0] and &s.p)
7701 //
7702 // map(from: s.f[:22]) map(to: ps->p[:33])
7703 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7704 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7705 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7706 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7707 // (*) the struct this entry pertains to is the 2nd element in the list of
7708 // arguments, hence MEMBER_OF(2)
7709 //
7710 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7711 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7712 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7713 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7714 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7715 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7716 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7717 // (*) the struct this entry pertains to is the 4th element in the list
7718 // of arguments, hence MEMBER_OF(4)
7719 //
7720 // map(p, p[:100])
7721 // ===> map(p[:100])
7722 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7723
7724 // Track if the map information being generated is the first for a capture.
7725 bool IsCaptureFirstInfo = IsFirstComponentList;
7726 // When the variable is on a declare target link or in a to clause with
7727 // unified memory, a reference is needed to hold the host/device address
7728 // of the variable.
7729 bool RequiresReference = false;
7730
7731 // Scan the components from the base to the complete expression.
7732 auto CI = Components.rbegin();
7733 auto CE = Components.rend();
7734 auto I = CI;
7735
7736 // Track if the map information being generated is the first for a list of
7737 // components.
7738 bool IsExpressionFirstInfo = true;
7739 bool FirstPointerInComplexData = false;
7740 Address BP = Address::invalid();
7741 const Expr *AssocExpr = I->getAssociatedExpression();
7742 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7743 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7744 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7745
7746 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7747 return;
7748 if (isa<MemberExpr>(AssocExpr)) {
7749 // The base is the 'this' pointer. The content of the pointer is going
7750 // to be the base of the field being mapped.
7751 BP = CGF.LoadCXXThisAddress();
7752 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7753 (OASE &&
7754 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7755 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7756 } else if (OAShE &&
7757 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7758 BP = Address(
7759 CGF.EmitScalarExpr(OAShE->getBase()),
7760 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7761 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7762 } else {
7763 // The base is the reference to the variable.
7764 // BP = &Var.
7765 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7766 if (const auto *VD =
7767 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7768 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7769 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7770 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7771 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7772 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7774 RequiresReference = true;
7776 }
7777 }
7778 }
7779
7780 // If the variable is a pointer and is being dereferenced (i.e. is not
7781 // the last component), the base has to be the pointer itself, not its
7782 // reference. References are ignored for mapping purposes.
7783 QualType Ty =
7784 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7785 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7786 // No need to generate individual map information for the pointer, it
7787 // can be associated with the combined storage if shared memory mode is
7788 // active or the base declaration is not global variable.
7789 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7790 if (!AreBothBasePtrAndPteeMapped &&
7792 !VD || VD->hasLocalStorage()))
7793 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7794 else
7795 FirstPointerInComplexData = true;
7796 ++I;
7797 }
7798 }
7799
7800 // Track whether a component of the list should be marked as MEMBER_OF some
7801 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7802 // in a component list should be marked as MEMBER_OF, all subsequent entries
7803 // do not belong to the base struct. E.g.
7804 // struct S2 s;
7805 // s.ps->ps->ps->f[:]
7806 // (1) (2) (3) (4)
7807 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7808 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7809 // is the pointee of ps(2) which is not member of struct s, so it should not
7810 // be marked as such (it is still PTR_AND_OBJ).
7811 // The variable is initialized to false so that PTR_AND_OBJ entries which
7812 // are not struct members are not considered (e.g. array of pointers to
7813 // data).
7814 bool ShouldBeMemberOf = false;
7815
7816 // Variable keeping track of whether or not we have encountered a component
7817 // in the component list which is a member expression. Useful when we have a
7818 // pointer or a final array section, in which case it is the previous
7819 // component in the list which tells us whether we have a member expression.
7820 // E.g. X.f[:]
7821 // While processing the final array section "[:]" it is "f" which tells us
7822 // whether we are dealing with a member of a declared struct.
7823 const MemberExpr *EncounteredME = nullptr;
7824
7825 // Track for the total number of dimension. Start from one for the dummy
7826 // dimension.
7827 uint64_t DimSize = 1;
7828
7829 // Detects non-contiguous updates due to strided accesses.
7830 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7831 // correctly when generating information to be passed to the runtime. The
7832 // flag is set to true if any array section has a stride not equal to 1, or
7833 // if the stride is not a constant expression (conservatively assumed
7834 // non-contiguous).
7835 bool IsNonContiguous =
7836 CombinedInfo.NonContigInfo.IsNonContiguous ||
7837 any_of(Components, [&](const auto &Component) {
7838 const auto *OASE =
7839 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7840 if (!OASE)
7841 return false;
7842
7843 const Expr *StrideExpr = OASE->getStride();
7844 if (!StrideExpr)
7845 return false;
7846
7847 const auto Constant =
7848 StrideExpr->getIntegerConstantExpr(CGF.getContext());
7849 if (!Constant)
7850 return false;
7851
7852 return !Constant->isOne();
7853 });
7854
7855 bool IsPrevMemberReference = false;
7856
7857 bool IsPartialMapped =
7858 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7859
7860 // We need to check if we will be encountering any MEs. If we do not
7861 // encounter any ME expression it means we will be mapping the whole struct.
7862 // In that case we need to skip adding an entry for the struct to the
7863 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7864 // list only when generating all info for clauses.
7865 bool IsMappingWholeStruct = true;
7866 if (!GenerateAllInfoForClauses) {
7867 IsMappingWholeStruct = false;
7868 } else {
7869 for (auto TempI = I; TempI != CE; ++TempI) {
7870 const MemberExpr *PossibleME =
7871 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7872 if (PossibleME) {
7873 IsMappingWholeStruct = false;
7874 break;
7875 }
7876 }
7877 }
7878
7879 for (; I != CE; ++I) {
7880 // If the current component is member of a struct (parent struct) mark it.
7881 if (!EncounteredME) {
7882 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7883 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7884 // as MEMBER_OF the parent struct.
7885 if (EncounteredME) {
7886 ShouldBeMemberOf = true;
7887 // Do not emit as complex pointer if this is actually not array-like
7888 // expression.
7889 if (FirstPointerInComplexData) {
7890 QualType Ty = std::prev(I)
7891 ->getAssociatedDeclaration()
7892 ->getType()
7893 .getNonReferenceType();
7894 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7895 FirstPointerInComplexData = false;
7896 }
7897 }
7898 }
7899
7900 auto Next = std::next(I);
7901
7902 // We need to generate the addresses and sizes if this is the last
7903 // component, if the component is a pointer or if it is an array section
7904 // whose length can't be proved to be one. If this is a pointer, it
7905 // becomes the base address for the following components.
7906
7907 // A final array section, is one whose length can't be proved to be one.
7908 // If the map item is non-contiguous then we don't treat any array section
7909 // as final array section.
7910 bool IsFinalArraySection =
7911 !IsNonContiguous &&
7912 isFinalArraySectionExpression(I->getAssociatedExpression());
7913
7914 // If we have a declaration for the mapping use that, otherwise use
7915 // the base declaration of the map clause.
7916 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7917 ? I->getAssociatedDeclaration()
7918 : BaseDecl;
7919 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7920 : MapExpr;
7921
7922 // Get information on whether the element is a pointer. Have to do a
7923 // special treatment for array sections given that they are built-in
7924 // types.
7925 const auto *OASE =
7926 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7927 const auto *OAShE =
7928 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7929 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7930 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7931 bool IsPointer =
7932 OAShE ||
7935 ->isAnyPointerType()) ||
7936 I->getAssociatedExpression()->getType()->isAnyPointerType();
7937 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7938 MapDecl &&
7939 MapDecl->getType()->isLValueReferenceType();
7940 bool IsNonDerefPointer = IsPointer &&
7941 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7942 !IsNonContiguous;
7943
7944 if (OASE)
7945 ++DimSize;
7946
7947 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7948 IsFinalArraySection) {
7949 // If this is not the last component, we expect the pointer to be
7950 // associated with an array expression or member expression.
7951 assert((Next == CE ||
7952 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7953 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7954 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7955 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7956 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7957 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7958 "Unexpected expression");
7959
7960 Address LB = Address::invalid();
7961 Address LowestElem = Address::invalid();
7962 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7963 const MemberExpr *E) {
7964 const Expr *BaseExpr = E->getBase();
7965 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7966 // scalar.
7967 LValue BaseLV;
7968 if (E->isArrow()) {
7969 LValueBaseInfo BaseInfo;
7970 TBAAAccessInfo TBAAInfo;
7971 Address Addr =
7972 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7973 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7974 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7975 } else {
7976 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7977 }
7978 return BaseLV;
7979 };
7980 if (OAShE) {
7981 LowestElem = LB =
7982 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7984 OAShE->getBase()->getType()->getPointeeType()),
7986 OAShE->getBase()->getType()));
7987 } else if (IsMemberReference) {
7988 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7989 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7990 LowestElem = CGF.EmitLValueForFieldInitialization(
7991 BaseLVal, cast<FieldDecl>(MapDecl))
7992 .getAddress();
7993 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7994 .getAddress();
7995 } else {
7996 LowestElem = LB =
7997 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7998 .getAddress();
7999 }
8000
8001 // If this component is a pointer inside the base struct then we don't
8002 // need to create any entry for it - it will be combined with the object
8003 // it is pointing to into a single PTR_AND_OBJ entry.
8004 bool IsMemberPointerOrAddr =
8005 EncounteredME &&
8006 (((IsPointer || ForDeviceAddr) &&
8007 I->getAssociatedExpression() == EncounteredME) ||
8008 (IsPrevMemberReference && !IsPointer) ||
8009 (IsMemberReference && Next != CE &&
8010 !Next->getAssociatedExpression()->getType()->isPointerType()));
8011 if (!OverlappedElements.empty() && Next == CE) {
8012 // Handle base element with the info for overlapped elements.
8013 assert(!PartialStruct.Base.isValid() && "The base element is set.");
8014 assert(!IsPointer &&
8015 "Unexpected base element with the pointer type.");
8016 // Mark the whole struct as the struct that requires allocation on the
8017 // device.
8018 PartialStruct.LowestElem = {0, LowestElem};
8019 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8020 I->getAssociatedExpression()->getType());
8021 Address HB = CGF.Builder.CreateConstGEP(
8023 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8024 TypeSize.getQuantity() - 1);
8025 PartialStruct.HighestElem = {
8026 std::numeric_limits<decltype(
8027 PartialStruct.HighestElem.first)>::max(),
8028 HB};
8029 PartialStruct.Base = BP;
8030 PartialStruct.LB = LB;
8031 assert(
8032 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
8033 "Overlapped elements must be used only once for the variable.");
8034 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8035 // Emit data for non-overlapped data.
8036 OpenMPOffloadMappingFlags Flags =
8037 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8038 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8039 /*AddPtrFlag=*/false,
8040 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8041 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
8042 MapExpr, BP, LB, IsNonContiguous,
8043 DimSize);
8044 // Do bitcopy of all non-overlapped structure elements.
8046 Component : OverlappedElements) {
8047 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8048 Component) {
8049 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8050 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
8051 CopyGaps.processField(MC, FD, EmitMemberExprBase);
8052 }
8053 }
8054 }
8055 }
8056 CopyGaps.copyUntilEnd(HB);
8057 break;
8058 }
8059 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8060 // Skip adding an entry in the CurInfo of this combined entry if the
8061 // whole struct is currently being mapped. The struct needs to be added
8062 // in the first position before any data internal to the struct is being
8063 // mapped.
8064 // Skip adding an entry in the CurInfo of this combined entry if the
8065 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
8066 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
8067 (Next == CE && MapType != OMPC_MAP_unknown)) {
8068 if (!IsMappingWholeStruct) {
8069 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8070 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
8071 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8072 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8073 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8074 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8075 Size, CGF.Int64Ty, /*isSigned=*/true));
8076 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8077 : 1);
8078 } else {
8079 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8080 StructBaseCombinedInfo.BasePointers.push_back(
8081 BP.emitRawPointer(CGF));
8082 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
8083 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8084 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
8085 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8086 Size, CGF.Int64Ty, /*isSigned=*/true));
8087 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
8088 IsNonContiguous ? DimSize : 1);
8089 }
8090
8091 // If Mapper is valid, the last component inherits the mapper.
8092 bool HasMapper = Mapper && Next == CE;
8093 if (!IsMappingWholeStruct)
8094 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8095 else
8096 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
8097 : nullptr);
8098
8099 // We need to add a pointer flag for each map that comes from the
8100 // same expression except for the first one. We also need to signal
8101 // this map is the first one that relates with the current capture
8102 // (there is a set of entries for each capture).
8103 OpenMPOffloadMappingFlags Flags =
8104 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8105 !IsExpressionFirstInfo || RequiresReference ||
8106 FirstPointerInComplexData || IsMemberReference,
8107 AreBothBasePtrAndPteeMapped ||
8108 (IsCaptureFirstInfo && !RequiresReference),
8109 IsNonContiguous);
8110
8111 if (!IsExpressionFirstInfo || IsMemberReference) {
8112 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8113 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8114 if (IsPointer || (IsMemberReference && Next != CE))
8115 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8116 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
8117 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
8118 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
8119 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
8120
8121 if (ShouldBeMemberOf) {
8122 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8123 // should be later updated with the correct value of MEMBER_OF.
8124 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
8125 // From now on, all subsequent PTR_AND_OBJ entries should not be
8126 // marked as MEMBER_OF.
8127 ShouldBeMemberOf = false;
8128 }
8129 }
8130
8131 if (!IsMappingWholeStruct)
8132 CombinedInfo.Types.push_back(Flags);
8133 else
8134 StructBaseCombinedInfo.Types.push_back(Flags);
8135 }
8136
8137 // If we have encountered a member expression so far, keep track of the
8138 // mapped member. If the parent is "*this", then the value declaration
8139 // is nullptr.
8140 if (EncounteredME) {
8141 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8142 unsigned FieldIndex = FD->getFieldIndex();
8143
8144 // Update info about the lowest and highest elements for this struct
8145 if (!PartialStruct.Base.isValid()) {
8146 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8147 if (IsFinalArraySection && OASE) {
8148 Address HB =
8149 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8150 .getAddress();
8151 PartialStruct.HighestElem = {FieldIndex, HB};
8152 } else {
8153 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8154 }
8155 PartialStruct.Base = BP;
8156 PartialStruct.LB = BP;
8157 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8158 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8159 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8160 if (IsFinalArraySection && OASE) {
8161 Address HB =
8162 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
8163 .getAddress();
8164 PartialStruct.HighestElem = {FieldIndex, HB};
8165 } else {
8166 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8167 }
8168 }
8169 }
8170
8171 // Need to emit combined struct for array sections.
8172 if (IsFinalArraySection || IsNonContiguous)
8173 PartialStruct.IsArraySection = true;
8174
8175 // If we have a final array section, we are done with this expression.
8176 if (IsFinalArraySection)
8177 break;
8178
8179 // The pointer becomes the base for the next element.
8180 if (Next != CE)
8181 BP = IsMemberReference ? LowestElem : LB;
8182 if (!IsPartialMapped)
8183 IsExpressionFirstInfo = false;
8184 IsCaptureFirstInfo = false;
8185 FirstPointerInComplexData = false;
8186 IsPrevMemberReference = IsMemberReference;
8187 } else if (FirstPointerInComplexData) {
8188 QualType Ty = Components.rbegin()
8189 ->getAssociatedDeclaration()
8190 ->getType()
8191 .getNonReferenceType();
8192 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8193 FirstPointerInComplexData = false;
8194 }
8195 }
8196 // If ran into the whole component - allocate the space for the whole
8197 // record.
8198 if (!EncounteredME)
8199 PartialStruct.HasCompleteRecord = true;
8200
8201 if (!IsNonContiguous)
8202 return;
8203
8204 const ASTContext &Context = CGF.getContext();
8205
8206 // For supporting stride in array section, we need to initialize the first
8207 // dimension size as 1, first offset as 0, and first count as 1
8208 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8209 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8210 MapValuesArrayTy CurStrides;
8211 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8212 uint64_t ElementTypeSize;
8213
8214 // Collect Size information for each dimension and get the element size as
8215 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8216 // should be [10, 10] and the first stride is 4 btyes.
8217 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8218 Components) {
8219 const Expr *AssocExpr = Component.getAssociatedExpression();
8220 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8221
8222 if (!OASE)
8223 continue;
8224
8225 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
8226 auto *CAT = Context.getAsConstantArrayType(Ty);
8227 auto *VAT = Context.getAsVariableArrayType(Ty);
8228
8229 // We need all the dimension size except for the last dimension.
8230 assert((VAT || CAT || &Component == &*Components.begin()) &&
8231 "Should be either ConstantArray or VariableArray if not the "
8232 "first Component");
8233
8234 // Get element size if CurStrides is empty.
8235 if (CurStrides.empty()) {
8236 const Type *ElementType = nullptr;
8237 if (CAT)
8238 ElementType = CAT->getElementType().getTypePtr();
8239 else if (VAT)
8240 ElementType = VAT->getElementType().getTypePtr();
8241 else
8242 assert(&Component == &*Components.begin() &&
8243 "Only expect pointer (non CAT or VAT) when this is the "
8244 "first Component");
8245 // If ElementType is null, then it means the base is a pointer
8246 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8247 // for next iteration.
8248 if (ElementType) {
8249 // For the case that having pointer as base, we need to remove one
8250 // level of indirection.
8251 if (&Component != &*Components.begin())
8252 ElementType = ElementType->getPointeeOrArrayElementType();
8253 ElementTypeSize =
8254 Context.getTypeSizeInChars(ElementType).getQuantity();
8255 CurStrides.push_back(
8256 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8257 }
8258 }
8259 // Get dimension value except for the last dimension since we don't need
8260 // it.
8261 if (DimSizes.size() < Components.size() - 1) {
8262 if (CAT)
8263 DimSizes.push_back(
8264 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
8265 else if (VAT)
8266 DimSizes.push_back(CGF.Builder.CreateIntCast(
8267 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8268 /*IsSigned=*/false));
8269 }
8270 }
8271
8272 // Skip the dummy dimension since we have already have its information.
8273 auto *DI = DimSizes.begin() + 1;
8274 // Product of dimension.
8275 llvm::Value *DimProd =
8276 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8277
8278 // Collect info for non-contiguous. Notice that offset, count, and stride
8279 // are only meaningful for array-section, so we insert a null for anything
8280 // other than array-section.
8281 // Also, the size of offset, count, and stride are not the same as
8282 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8283 // count, and stride are the same as the number of non-contiguous
8284 // declaration in target update to/from clause.
8285 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8286 Components) {
8287 const Expr *AssocExpr = Component.getAssociatedExpression();
8288
8289 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8290 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8291 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8292 /*isSigned=*/false);
8293 CurOffsets.push_back(Offset);
8294 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8295 CurStrides.push_back(CurStrides.back());
8296 continue;
8297 }
8298
8299 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
8300
8301 if (!OASE)
8302 continue;
8303
8304 // Offset
8305 const Expr *OffsetExpr = OASE->getLowerBound();
8306 llvm::Value *Offset = nullptr;
8307 if (!OffsetExpr) {
8308 // If offset is absent, then we just set it to zero.
8309 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8310 } else {
8311 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8312 CGF.Int64Ty,
8313 /*isSigned=*/false);
8314 }
8315 CurOffsets.push_back(Offset);
8316
8317 // Count
8318 const Expr *CountExpr = OASE->getLength();
8319 llvm::Value *Count = nullptr;
8320 if (!CountExpr) {
8321 // In Clang, once a high dimension is an array section, we construct all
8322 // the lower dimension as array section, however, for case like
8323 // arr[0:2][2], Clang construct the inner dimension as an array section
8324 // but it actually is not in an array section form according to spec.
8325 if (!OASE->getColonLocFirst().isValid() &&
8326 !OASE->getColonLocSecond().isValid()) {
8327 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8328 } else {
8329 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8330 // When the length is absent it defaults to ⌈(size −
8331 // lower-bound)/stride⌉, where size is the size of the array
8332 // dimension.
8333 const Expr *StrideExpr = OASE->getStride();
8334 llvm::Value *Stride =
8335 StrideExpr
8336 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8337 CGF.Int64Ty, /*isSigned=*/false)
8338 : nullptr;
8339 if (Stride)
8340 Count = CGF.Builder.CreateUDiv(
8341 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8342 else
8343 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8344 }
8345 } else {
8346 Count = CGF.EmitScalarExpr(CountExpr);
8347 }
8348 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8349 CurCounts.push_back(Count);
8350
8351 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8352 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8353 // Offset Count Stride
8354 // D0 0 1 4 (int) <- dummy dimension
8355 // D1 0 2 8 (2 * (1) * 4)
8356 // D2 1 2 20 (1 * (1 * 5) * 4)
8357 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8358 const Expr *StrideExpr = OASE->getStride();
8359 llvm::Value *Stride =
8360 StrideExpr
8361 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8362 CGF.Int64Ty, /*isSigned=*/false)
8363 : nullptr;
8364 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8365 if (Stride)
8366 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8367 else
8368 CurStrides.push_back(DimProd);
8369 if (DI != DimSizes.end())
8370 ++DI;
8371 }
8372
8373 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8374 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8375 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8376 }
8377
8378 /// Return the adjusted map modifiers if the declaration a capture refers to
8379 /// appears in a first-private clause. This is expected to be used only with
8380 /// directives that start with 'target'.
8381 OpenMPOffloadMappingFlags
8382 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8383 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8384
8385 // A first private variable captured by reference will use only the
8386 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8387 // declaration is known as first-private in this handler.
8388 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8389 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8390 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8391 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8392 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8393 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8394 }
8395 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8396 if (I != LambdasMap.end())
8397 // for map(to: lambda): using user specified map type.
8398 return getMapTypeBits(
8399 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8400 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8401 /*AddPtrFlag=*/false,
8402 /*AddIsTargetParamFlag=*/false,
8403 /*isNonContiguous=*/false);
8404 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8405 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8406 }
8407
8408 void getPlainLayout(const CXXRecordDecl *RD,
8409 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8410 bool AsBase) const {
8411 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8412
8413 llvm::StructType *St =
8414 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8415
8416 unsigned NumElements = St->getNumElements();
8417 llvm::SmallVector<
8418 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8419 RecordLayout(NumElements);
8420
8421 // Fill bases.
8422 for (const auto &I : RD->bases()) {
8423 if (I.isVirtual())
8424 continue;
8425
8426 QualType BaseTy = I.getType();
8427 const auto *Base = BaseTy->getAsCXXRecordDecl();
8428 // Ignore empty bases.
8429 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8430 CGF.getContext()
8431 .getASTRecordLayout(Base)
8433 .isZero())
8434 continue;
8435
8436 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8437 RecordLayout[FieldIndex] = Base;
8438 }
8439 // Fill in virtual bases.
8440 for (const auto &I : RD->vbases()) {
8441 QualType BaseTy = I.getType();
8442 // Ignore empty bases.
8443 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8444 continue;
8445
8446 const auto *Base = BaseTy->getAsCXXRecordDecl();
8447 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8448 if (RecordLayout[FieldIndex])
8449 continue;
8450 RecordLayout[FieldIndex] = Base;
8451 }
8452 // Fill in all the fields.
8453 assert(!RD->isUnion() && "Unexpected union.");
8454 for (const auto *Field : RD->fields()) {
8455 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8456 // will fill in later.)
8457 if (!Field->isBitField() &&
8458 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8459 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8460 RecordLayout[FieldIndex] = Field;
8461 }
8462 }
8463 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8464 &Data : RecordLayout) {
8465 if (Data.isNull())
8466 continue;
8467 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8468 getPlainLayout(Base, Layout, /*AsBase=*/true);
8469 else
8470 Layout.push_back(cast<const FieldDecl *>(Data));
8471 }
8472 }
8473
8474 /// Returns the address corresponding to \p PointerExpr.
8475 static Address getAttachPtrAddr(const Expr *PointerExpr,
8476 CodeGenFunction &CGF) {
8477 assert(PointerExpr && "Cannot get addr from null attach-ptr expr");
8478 Address AttachPtrAddr = Address::invalid();
8479
8480 if (auto *DRE = dyn_cast<DeclRefExpr>(PointerExpr)) {
8481 // If the pointer is a variable, we can use its address directly.
8482 AttachPtrAddr = CGF.EmitLValue(DRE).getAddress();
8483 } else if (auto *OASE = dyn_cast<ArraySectionExpr>(PointerExpr)) {
8484 AttachPtrAddr =
8485 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/true).getAddress();
8486 } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(PointerExpr)) {
8487 AttachPtrAddr = CGF.EmitLValue(ASE).getAddress();
8488 } else if (auto *ME = dyn_cast<MemberExpr>(PointerExpr)) {
8489 AttachPtrAddr = CGF.EmitMemberExpr(ME).getAddress();
8490 } else if (auto *UO = dyn_cast<UnaryOperator>(PointerExpr)) {
8491 assert(UO->getOpcode() == UO_Deref &&
8492 "Unexpected unary-operator on attach-ptr-expr");
8493 AttachPtrAddr = CGF.EmitLValue(UO).getAddress();
8494 }
8495 assert(AttachPtrAddr.isValid() &&
8496 "Failed to get address for attach pointer expression");
8497 return AttachPtrAddr;
8498 }
8499
8500 /// Get the address of the attach pointer, and a load from it, to get the
8501 /// pointee base address.
8502 /// \return A pair containing AttachPtrAddr and AttachPteeBaseAddr. The pair
8503 /// contains invalid addresses if \p AttachPtrExpr is null.
8504 static std::pair<Address, Address>
8505 getAttachPtrAddrAndPteeBaseAddr(const Expr *AttachPtrExpr,
8506 CodeGenFunction &CGF) {
8507
8508 if (!AttachPtrExpr)
8509 return {Address::invalid(), Address::invalid()};
8510
8511 Address AttachPtrAddr = getAttachPtrAddr(AttachPtrExpr, CGF);
8512 assert(AttachPtrAddr.isValid() && "Invalid attach pointer addr");
8513
8514 QualType AttachPtrType =
8517
8518 Address AttachPteeBaseAddr = CGF.EmitLoadOfPointer(
8519 AttachPtrAddr, AttachPtrType->castAs<PointerType>());
8520 assert(AttachPteeBaseAddr.isValid() && "Invalid attach pointee base addr");
8521
8522 return {AttachPtrAddr, AttachPteeBaseAddr};
8523 }
8524
8525 /// Returns whether an attach entry should be emitted for a map on
8526 /// \p MapBaseDecl on the directive \p CurDir.
8527 static bool
8528 shouldEmitAttachEntry(const Expr *PointerExpr, const ValueDecl *MapBaseDecl,
8529 CodeGenFunction &CGF,
8530 llvm::PointerUnion<const OMPExecutableDirective *,
8531 const OMPDeclareMapperDecl *>
8532 CurDir) {
8533 if (!PointerExpr)
8534 return false;
8535
8536 // Pointer attachment is needed at map-entering time or for declare
8537 // mappers.
8538 return isa<const OMPDeclareMapperDecl *>(CurDir) ||
8541 ->getDirectiveKind());
8542 }
8543
8544 /// Computes the attach-ptr expr for \p Components, and updates various maps
8545 /// with the information.
8546 /// It internally calls OMPClauseMappableExprCommon::findAttachPtrExpr()
8547 /// with the OpenMPDirectiveKind extracted from \p CurDir.
8548 /// It updates AttachPtrComputationOrderMap, AttachPtrComponentDepthMap, and
8549 /// AttachPtrExprMap.
8550 void collectAttachPtrExprInfo(
8552 llvm::PointerUnion<const OMPExecutableDirective *,
8553 const OMPDeclareMapperDecl *>
8554 CurDir) {
8555
8556 OpenMPDirectiveKind CurDirectiveID =
8558 ? OMPD_declare_mapper
8559 : cast<const OMPExecutableDirective *>(CurDir)->getDirectiveKind();
8560
8561 const auto &[AttachPtrExpr, Depth] =
8563 CurDirectiveID);
8564
8565 AttachPtrComputationOrderMap.try_emplace(
8566 AttachPtrExpr, AttachPtrComputationOrderMap.size());
8567 AttachPtrComponentDepthMap.try_emplace(AttachPtrExpr, Depth);
8568 AttachPtrExprMap.try_emplace(Components, AttachPtrExpr);
8569 }
8570
8571 /// Generate all the base pointers, section pointers, sizes, map types, and
8572 /// mappers for the extracted mappable expressions (all included in \a
8573 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8574 /// pair of the relevant declaration and index where it occurs is appended to
8575 /// the device pointers info array.
8576 void generateAllInfoForClauses(
8577 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8578 llvm::OpenMPIRBuilder &OMPBuilder,
8579 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8580 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8581 // We have to process the component lists that relate with the same
8582 // declaration in a single chunk so that we can generate the map flags
8583 // correctly. Therefore, we organize all lists in a map.
8584 enum MapKind { Present, Allocs, Other, Total };
8585 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8586 SmallVector<SmallVector<MapInfo, 8>, 4>>
8587 Info;
8588
8589 // Helper function to fill the information map for the different supported
8590 // clauses.
8591 auto &&InfoGen =
8592 [&Info, &SkipVarSet](
8593 const ValueDecl *D, MapKind Kind,
8595 OpenMPMapClauseKind MapType,
8596 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8597 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8598 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8599 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8600 if (SkipVarSet.contains(D))
8601 return;
8602 auto It = Info.try_emplace(D, Total).first;
8603 It->second[Kind].emplace_back(
8604 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8605 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8606 };
8607
8608 for (const auto *Cl : Clauses) {
8609 const auto *C = dyn_cast<OMPMapClause>(Cl);
8610 if (!C)
8611 continue;
8612 MapKind Kind = Other;
8613 if (llvm::is_contained(C->getMapTypeModifiers(),
8614 OMPC_MAP_MODIFIER_present))
8615 Kind = Present;
8616 else if (C->getMapType() == OMPC_MAP_alloc)
8617 Kind = Allocs;
8618 const auto *EI = C->getVarRefs().begin();
8619 for (const auto L : C->component_lists()) {
8620 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8621 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8622 C->getMapTypeModifiers(), {},
8623 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8624 E);
8625 ++EI;
8626 }
8627 }
8628 for (const auto *Cl : Clauses) {
8629 const auto *C = dyn_cast<OMPToClause>(Cl);
8630 if (!C)
8631 continue;
8632 MapKind Kind = Other;
8633 if (llvm::is_contained(C->getMotionModifiers(),
8634 OMPC_MOTION_MODIFIER_present))
8635 Kind = Present;
8636 const auto *EI = C->getVarRefs().begin();
8637 for (const auto L : C->component_lists()) {
8638 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8639 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8640 C->isImplicit(), std::get<2>(L), *EI);
8641 ++EI;
8642 }
8643 }
8644 for (const auto *Cl : Clauses) {
8645 const auto *C = dyn_cast<OMPFromClause>(Cl);
8646 if (!C)
8647 continue;
8648 MapKind Kind = Other;
8649 if (llvm::is_contained(C->getMotionModifiers(),
8650 OMPC_MOTION_MODIFIER_present))
8651 Kind = Present;
8652 const auto *EI = C->getVarRefs().begin();
8653 for (const auto L : C->component_lists()) {
8654 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8655 C->getMotionModifiers(),
8656 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8657 *EI);
8658 ++EI;
8659 }
8660 }
8661
8662 // Look at the use_device_ptr and use_device_addr clauses information and
8663 // mark the existing map entries as such. If there is no map information for
8664 // an entry in the use_device_ptr and use_device_addr list, we create one
8665 // with map type 'alloc' and zero size section. It is the user fault if that
8666 // was not mapped before. If there is no map information and the pointer is
8667 // a struct member, then we defer the emission of that entry until the whole
8668 // struct has been processed.
8669 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8670 SmallVector<DeferredDevicePtrEntryTy, 4>>
8671 DeferredInfo;
8672 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8673
8674 auto &&UseDeviceDataCombinedInfoGen =
8675 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8676 CodeGenFunction &CGF, bool IsDevAddr) {
8677 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8678 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8679 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8680 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8681 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8682 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8683 UseDeviceDataCombinedInfo.Sizes.push_back(
8684 llvm::Constant::getNullValue(CGF.Int64Ty));
8685 UseDeviceDataCombinedInfo.Types.push_back(
8686 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8687 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8688 };
8689
8690 auto &&MapInfoGen =
8691 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8692 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8694 Components,
8695 bool IsImplicit, bool IsDevAddr) {
8696 // We didn't find any match in our map information - generate a zero
8697 // size array section - if the pointer is a struct member we defer
8698 // this action until the whole struct has been processed.
8699 if (isa<MemberExpr>(IE)) {
8700 // Insert the pointer into Info to be processed by
8701 // generateInfoForComponentList. Because it is a member pointer
8702 // without a pointee, no entry will be generated for it, therefore
8703 // we need to generate one after the whole struct has been
8704 // processed. Nonetheless, generateInfoForComponentList must be
8705 // called to take the pointer into account for the calculation of
8706 // the range of the partial struct.
8707 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8708 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8709 IsDevAddr);
8710 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8711 } else {
8712 llvm::Value *Ptr;
8713 if (IsDevAddr) {
8714 if (IE->isGLValue())
8715 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8716 else
8717 Ptr = CGF.EmitScalarExpr(IE);
8718 } else {
8719 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8720 }
8721 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8722 }
8723 };
8724
8725 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8726 const Expr *IE, bool IsDevAddr) -> bool {
8727 // We potentially have map information for this declaration already.
8728 // Look for the first set of components that refer to it. If found,
8729 // return true.
8730 // If the first component is a member expression, we have to look into
8731 // 'this', which maps to null in the map of map information. Otherwise
8732 // look directly for the information.
8733 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8734 if (It != Info.end()) {
8735 bool Found = false;
8736 for (auto &Data : It->second) {
8737 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8738 return MI.Components.back().getAssociatedDeclaration() == VD;
8739 });
8740 // If we found a map entry, signal that the pointer has to be
8741 // returned and move on to the next declaration. Exclude cases where
8742 // the base pointer is mapped as array subscript, array section or
8743 // array shaping. The base address is passed as a pointer to base in
8744 // this case and cannot be used as a base for use_device_ptr list
8745 // item.
8746 if (CI != Data.end()) {
8747 if (IsDevAddr) {
8748 CI->ForDeviceAddr = IsDevAddr;
8749 CI->ReturnDevicePointer = true;
8750 Found = true;
8751 break;
8752 } else {
8753 auto PrevCI = std::next(CI->Components.rbegin());
8754 const auto *VarD = dyn_cast<VarDecl>(VD);
8756 isa<MemberExpr>(IE) ||
8757 !VD->getType().getNonReferenceType()->isPointerType() ||
8758 PrevCI == CI->Components.rend() ||
8759 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8760 VarD->hasLocalStorage()) {
8761 CI->ForDeviceAddr = IsDevAddr;
8762 CI->ReturnDevicePointer = true;
8763 Found = true;
8764 break;
8765 }
8766 }
8767 }
8768 }
8769 return Found;
8770 }
8771 return false;
8772 };
8773
8774 // Look at the use_device_ptr clause information and mark the existing map
8775 // entries as such. If there is no map information for an entry in the
8776 // use_device_ptr list, we create one with map type 'alloc' and zero size
8777 // section. It is the user fault if that was not mapped before. If there is
8778 // no map information and the pointer is a struct member, then we defer the
8779 // emission of that entry until the whole struct has been processed.
8780 for (const auto *Cl : Clauses) {
8781 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8782 if (!C)
8783 continue;
8784 for (const auto L : C->component_lists()) {
8786 std::get<1>(L);
8787 assert(!Components.empty() &&
8788 "Not expecting empty list of components!");
8789 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8791 const Expr *IE = Components.back().getAssociatedExpression();
8792 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8793 continue;
8794 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8795 /*IsDevAddr=*/false);
8796 }
8797 }
8798
8799 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8800 for (const auto *Cl : Clauses) {
8801 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8802 if (!C)
8803 continue;
8804 for (const auto L : C->component_lists()) {
8806 std::get<1>(L);
8807 assert(!std::get<1>(L).empty() &&
8808 "Not expecting empty list of components!");
8809 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8810 if (!Processed.insert(VD).second)
8811 continue;
8813 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8814 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8815 continue;
8816 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8817 /*IsDevAddr=*/true);
8818 }
8819 }
8820
8821 for (const auto &Data : Info) {
8822 StructRangeInfoTy PartialStruct;
8823 // Current struct information:
8824 MapCombinedInfoTy CurInfo;
8825 // Current struct base information:
8826 MapCombinedInfoTy StructBaseCurInfo;
8827 const Decl *D = Data.first;
8828 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8829 bool HasMapBasePtr = false;
8830 bool HasMapArraySec = false;
8831 if (VD && VD->getType()->isAnyPointerType()) {
8832 for (const auto &M : Data.second) {
8833 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8834 return isa_and_present<DeclRefExpr>(L.VarRef);
8835 });
8836 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8837 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8838 L.VarRef);
8839 });
8840 if (HasMapBasePtr && HasMapArraySec)
8841 break;
8842 }
8843 }
8844 for (const auto &M : Data.second) {
8845 for (const MapInfo &L : M) {
8846 assert(!L.Components.empty() &&
8847 "Not expecting declaration with no component lists.");
8848
8849 // Remember the current base pointer index.
8850 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8851 unsigned StructBasePointersIdx =
8852 StructBaseCurInfo.BasePointers.size();
8853 CurInfo.NonContigInfo.IsNonContiguous =
8854 L.Components.back().isNonContiguous();
8855 generateInfoForComponentList(
8856 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8857 CurInfo, StructBaseCurInfo, PartialStruct,
8858 /*IsFirstComponentList=*/false, L.IsImplicit,
8859 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8860 L.VarRef, /*OverlappedElements*/ {},
8861 HasMapBasePtr && HasMapArraySec);
8862
8863 // If this entry relates to a device pointer, set the relevant
8864 // declaration and add the 'return pointer' flag.
8865 if (L.ReturnDevicePointer) {
8866 // Check whether a value was added to either CurInfo or
8867 // StructBaseCurInfo and error if no value was added to either of
8868 // them:
8869 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8870 StructBasePointersIdx <
8871 StructBaseCurInfo.BasePointers.size()) &&
8872 "Unexpected number of mapped base pointers.");
8873
8874 // Choose a base pointer index which is always valid:
8875 const ValueDecl *RelevantVD =
8876 L.Components.back().getAssociatedDeclaration();
8877 assert(RelevantVD &&
8878 "No relevant declaration related with device pointer??");
8879
8880 // If StructBaseCurInfo has been updated this iteration then work on
8881 // the first new entry added to it i.e. make sure that when multiple
8882 // values are added to any of the lists, the first value added is
8883 // being modified by the assignments below (not the last value
8884 // added).
8885 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8886 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8887 RelevantVD;
8888 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8889 L.ForDeviceAddr ? DeviceInfoTy::Address
8890 : DeviceInfoTy::Pointer;
8891 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8892 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8893 } else {
8894 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8895 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8896 L.ForDeviceAddr ? DeviceInfoTy::Address
8897 : DeviceInfoTy::Pointer;
8898 CurInfo.Types[CurrentBasePointersIdx] |=
8899 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8900 }
8901 }
8902 }
8903 }
8904
8905 // Append any pending zero-length pointers which are struct members and
8906 // used with use_device_ptr or use_device_addr.
8907 auto CI = DeferredInfo.find(Data.first);
8908 if (CI != DeferredInfo.end()) {
8909 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8910 llvm::Value *BasePtr;
8911 llvm::Value *Ptr;
8912 if (L.ForDeviceAddr) {
8913 if (L.IE->isGLValue())
8914 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8915 else
8916 Ptr = this->CGF.EmitScalarExpr(L.IE);
8917 BasePtr = Ptr;
8918 // Entry is RETURN_PARAM. Also, set the placeholder value
8919 // MEMBER_OF=FFFF so that the entry is later updated with the
8920 // correct value of MEMBER_OF.
8921 CurInfo.Types.push_back(
8922 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8923 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8924 } else {
8925 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8926 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8927 L.IE->getExprLoc());
8928 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8929 // placeholder value MEMBER_OF=FFFF so that the entry is later
8930 // updated with the correct value of MEMBER_OF.
8931 CurInfo.Types.push_back(
8932 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8933 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8934 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8935 }
8936 CurInfo.Exprs.push_back(L.VD);
8937 CurInfo.BasePointers.emplace_back(BasePtr);
8938 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8939 CurInfo.DevicePointers.emplace_back(
8940 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8941 CurInfo.Pointers.push_back(Ptr);
8942 CurInfo.Sizes.push_back(
8943 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8944 CurInfo.Mappers.push_back(nullptr);
8945 }
8946 }
8947
8948 // Unify entries in one list making sure the struct mapping precedes the
8949 // individual fields:
8950 MapCombinedInfoTy UnionCurInfo;
8951 UnionCurInfo.append(StructBaseCurInfo);
8952 UnionCurInfo.append(CurInfo);
8953
8954 // If there is an entry in PartialStruct it means we have a struct with
8955 // individual members mapped. Emit an extra combined entry.
8956 if (PartialStruct.Base.isValid()) {
8957 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8958 // Emit a combined entry:
8959 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8960 /*IsMapThis*/ !VD, OMPBuilder, VD);
8961 }
8962
8963 // We need to append the results of this capture to what we already have.
8964 CombinedInfo.append(UnionCurInfo);
8965 }
8966 // Append data for use_device_ptr clauses.
8967 CombinedInfo.append(UseDeviceDataCombinedInfo);
8968 }
8969
8970public:
8971 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8972 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {
8973 // Extract firstprivate clause information.
8974 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8975 for (const auto *D : C->varlist())
8976 FirstPrivateDecls.try_emplace(
8977 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8978 // Extract implicit firstprivates from uses_allocators clauses.
8979 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8980 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8981 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8982 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8983 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8984 /*Implicit=*/true);
8985 else if (const auto *VD = dyn_cast<VarDecl>(
8986 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8987 ->getDecl()))
8988 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8989 }
8990 }
8991 // Extract device pointer clause information.
8992 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8993 for (auto L : C->component_lists())
8994 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8995 // Extract device addr clause information.
8996 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8997 for (auto L : C->component_lists())
8998 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8999 // Extract map information.
9000 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
9001 if (C->getMapType() != OMPC_MAP_to)
9002 continue;
9003 for (auto L : C->component_lists()) {
9004 const ValueDecl *VD = std::get<0>(L);
9005 const auto *RD = VD ? VD->getType()
9006 .getCanonicalType()
9007 .getNonReferenceType()
9008 ->getAsCXXRecordDecl()
9009 : nullptr;
9010 if (RD && RD->isLambda())
9011 LambdasMap.try_emplace(std::get<0>(L), C);
9012 }
9013 }
9014 }
9015
9016 /// Constructor for the declare mapper directive.
9017 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
9018 : CurDir(&Dir), CGF(CGF), AttachPtrComparator(*this) {}
9019
9020 /// Generate code for the combined entry if we have a partially mapped struct
9021 /// and take care of the mapping flags of the arguments corresponding to
9022 /// individual struct members.
9023 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
9024 MapFlagsArrayTy &CurTypes,
9025 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
9026 llvm::OpenMPIRBuilder &OMPBuilder,
9027 const ValueDecl *VD = nullptr,
9028 unsigned OffsetForMemberOfFlag = 0,
9029 bool NotTargetParams = true) const {
9030 if (CurTypes.size() == 1 &&
9031 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
9032 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
9033 !PartialStruct.IsArraySection)
9034 return;
9035 Address LBAddr = PartialStruct.LowestElem.second;
9036 Address HBAddr = PartialStruct.HighestElem.second;
9037 if (PartialStruct.HasCompleteRecord) {
9038 LBAddr = PartialStruct.LB;
9039 HBAddr = PartialStruct.LB;
9040 }
9041 CombinedInfo.Exprs.push_back(VD);
9042 // Base is the base of the struct
9043 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9044 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9045 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9046 // Pointer is the address of the lowest element
9047 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
9048 const CXXMethodDecl *MD =
9049 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
9050 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
9051 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
9052 // There should not be a mapper for a combined entry.
9053 if (HasBaseClass) {
9054 // OpenMP 5.2 148:21:
9055 // If the target construct is within a class non-static member function,
9056 // and a variable is an accessible data member of the object for which the
9057 // non-static data member function is invoked, the variable is treated as
9058 // if the this[:1] expression had appeared in a map clause with a map-type
9059 // of tofrom.
9060 // Emit this[:1]
9061 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
9062 QualType Ty = MD->getFunctionObjectParameterType();
9063 llvm::Value *Size =
9064 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
9065 /*isSigned=*/true);
9066 CombinedInfo.Sizes.push_back(Size);
9067 } else {
9068 CombinedInfo.Pointers.push_back(LB);
9069 // Size is (addr of {highest+1} element) - (addr of lowest element)
9070 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
9071 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
9072 HBAddr.getElementType(), HB, /*Idx0=*/1);
9073 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
9074 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
9075 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
9076 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
9077 /*isSigned=*/false);
9078 CombinedInfo.Sizes.push_back(Size);
9079 }
9080 CombinedInfo.Mappers.push_back(nullptr);
9081 // Map type is always TARGET_PARAM, if generate info for captures.
9082 CombinedInfo.Types.push_back(
9083 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
9084 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
9085 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
9086 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9087 // If any element has the present modifier, then make sure the runtime
9088 // doesn't attempt to allocate the struct.
9089 if (CurTypes.end() !=
9090 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9091 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9092 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9093 }))
9094 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
9095 // Remove TARGET_PARAM flag from the first element
9096 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9097 // If any element has the ompx_hold modifier, then make sure the runtime
9098 // uses the hold reference count for the struct as a whole so that it won't
9099 // be unmapped by an extra dynamic reference count decrement. Add it to all
9100 // elements as well so the runtime knows which reference count to check
9101 // when determining whether it's time for device-to-host transfers of
9102 // individual elements.
9103 if (CurTypes.end() !=
9104 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
9105 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9106 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
9107 })) {
9108 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9109 for (auto &M : CurTypes)
9110 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
9111 }
9112
9113 // All other current entries will be MEMBER_OF the combined entry
9114 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9115 // 0xFFFF in the MEMBER_OF field).
9116 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
9117 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
9118 for (auto &M : CurTypes)
9119 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
9120 }
9121
9122 /// Generate all the base pointers, section pointers, sizes, map types, and
9123 /// mappers for the extracted mappable expressions (all included in \a
9124 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9125 /// pair of the relevant declaration and index where it occurs is appended to
9126 /// the device pointers info array.
9127 void generateAllInfo(
9128 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9129 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9130 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9131 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9132 "Expect a executable directive");
9133 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9134 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
9135 SkipVarSet);
9136 }
9137
9138 /// Generate all the base pointers, section pointers, sizes, map types, and
9139 /// mappers for the extracted map clauses of user-defined mapper (all included
9140 /// in \a CombinedInfo).
9141 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
9142 llvm::OpenMPIRBuilder &OMPBuilder) const {
9143 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
9144 "Expect a declare mapper directive");
9145 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
9146 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
9147 OMPBuilder);
9148 }
9149
9150 /// Emit capture info for lambdas for variables captured by reference.
9151 void generateInfoForLambdaCaptures(
9152 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9153 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9154 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9155 const auto *RD = VDType->getAsCXXRecordDecl();
9156 if (!RD || !RD->isLambda())
9157 return;
9158 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9159 CGF.getContext().getDeclAlign(VD));
9160 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9161 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
9162 FieldDecl *ThisCapture = nullptr;
9163 RD->getCaptureFields(Captures, ThisCapture);
9164 if (ThisCapture) {
9165 LValue ThisLVal =
9166 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9167 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9168 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9169 VDLVal.getPointer(CGF));
9170 CombinedInfo.Exprs.push_back(VD);
9171 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9172 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9173 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9174 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9175 CombinedInfo.Sizes.push_back(
9176 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9177 CGF.Int64Ty, /*isSigned=*/true));
9178 CombinedInfo.Types.push_back(
9179 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9180 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9181 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9182 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9183 CombinedInfo.Mappers.push_back(nullptr);
9184 }
9185 for (const LambdaCapture &LC : RD->captures()) {
9186 if (!LC.capturesVariable())
9187 continue;
9188 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
9189 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9190 continue;
9191 auto It = Captures.find(VD);
9192 assert(It != Captures.end() && "Found lambda capture without field.");
9193 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9194 if (LC.getCaptureKind() == LCK_ByRef) {
9195 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9196 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9197 VDLVal.getPointer(CGF));
9198 CombinedInfo.Exprs.push_back(VD);
9199 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9200 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9201 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9202 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9203 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9204 CGF.getTypeSize(
9206 CGF.Int64Ty, /*isSigned=*/true));
9207 } else {
9208 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9209 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9210 VDLVal.getPointer(CGF));
9211 CombinedInfo.Exprs.push_back(VD);
9212 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9213 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9214 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9215 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9216 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9217 }
9218 CombinedInfo.Types.push_back(
9219 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9220 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9221 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9222 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9223 CombinedInfo.Mappers.push_back(nullptr);
9224 }
9225 }
9226
9227 /// Set correct indices for lambdas captures.
9228 void adjustMemberOfForLambdaCaptures(
9229 llvm::OpenMPIRBuilder &OMPBuilder,
9230 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9231 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9232 MapFlagsArrayTy &Types) const {
9233 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9234 // Set correct member_of idx for all implicit lambda captures.
9235 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
9236 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9237 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
9238 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
9239 continue;
9240 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
9241 assert(BasePtr && "Unable to find base lambda address.");
9242 int TgtIdx = -1;
9243 for (unsigned J = I; J > 0; --J) {
9244 unsigned Idx = J - 1;
9245 if (Pointers[Idx] != BasePtr)
9246 continue;
9247 TgtIdx = Idx;
9248 break;
9249 }
9250 assert(TgtIdx != -1 && "Unable to find parent lambda.");
9251 // All other current entries will be MEMBER_OF the combined entry
9252 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9253 // 0xFFFF in the MEMBER_OF field).
9254 OpenMPOffloadMappingFlags MemberOfFlag =
9255 OMPBuilder.getMemberOfFlag(TgtIdx);
9256 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9257 }
9258 }
9259
9260 /// For a capture that has an associated clause, generate the base pointers,
9261 /// section pointers, sizes, map types, and mappers (all included in
9262 /// \a CurCaptureVarInfo).
9263 void generateInfoForCaptureFromClauseInfo(
9264 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
9265 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
9266 unsigned OffsetForMemberOfFlag) const {
9267 assert(!Cap->capturesVariableArrayType() &&
9268 "Not expecting to generate map info for a variable array type!");
9269
9270 // We need to know when we generating information for the first component
9271 const ValueDecl *VD = Cap->capturesThis()
9272 ? nullptr
9273 : Cap->getCapturedVar()->getCanonicalDecl();
9274
9275 // for map(to: lambda): skip here, processing it in
9276 // generateDefaultMapInfo
9277 if (LambdasMap.count(VD))
9278 return;
9279
9280 // If this declaration appears in a is_device_ptr clause we just have to
9281 // pass the pointer by value. If it is a reference to a declaration, we just
9282 // pass its value.
9283 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
9284 CurCaptureVarInfo.Exprs.push_back(VD);
9285 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
9286 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
9287 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
9288 CurCaptureVarInfo.Pointers.push_back(Arg);
9289 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9290 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9291 /*isSigned=*/true));
9292 CurCaptureVarInfo.Types.push_back(
9293 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9294 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
9295 CurCaptureVarInfo.Mappers.push_back(nullptr);
9296 return;
9297 }
9298
9299 MapDataArrayTy DeclComponentLists;
9300 // For member fields list in is_device_ptr, store it in
9301 // DeclComponentLists for generating components info.
9303 auto It = DevPointersMap.find(VD);
9304 if (It != DevPointersMap.end())
9305 for (const auto &MCL : It->second)
9306 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9307 /*IsImpicit = */ true, nullptr,
9308 nullptr);
9309 auto I = HasDevAddrsMap.find(VD);
9310 if (I != HasDevAddrsMap.end())
9311 for (const auto &MCL : I->second)
9312 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9313 /*IsImpicit = */ true, nullptr,
9314 nullptr);
9315 assert(isa<const OMPExecutableDirective *>(CurDir) &&
9316 "Expect a executable directive");
9317 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
9318 bool HasMapBasePtr = false;
9319 bool HasMapArraySec = false;
9320 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9321 const auto *EI = C->getVarRefs().begin();
9322 for (const auto L : C->decl_component_lists(VD)) {
9323 const ValueDecl *VDecl, *Mapper;
9324 // The Expression is not correct if the mapping is implicit
9325 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9327 std::tie(VDecl, Components, Mapper) = L;
9328 assert(VDecl == VD && "We got information for the wrong declaration??");
9329 assert(!Components.empty() &&
9330 "Not expecting declaration with no component lists.");
9331 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
9332 HasMapBasePtr = true;
9333 if (VD && E && VD->getType()->isAnyPointerType() &&
9335 HasMapArraySec = true;
9336 DeclComponentLists.emplace_back(Components, C->getMapType(),
9337 C->getMapTypeModifiers(),
9338 C->isImplicit(), Mapper, E);
9339 ++EI;
9340 }
9341 }
9342 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9343 const MapData &RHS) {
9344 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9345 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9346 bool HasPresent =
9347 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9348 bool HasAllocs = MapType == OMPC_MAP_alloc;
9349 MapModifiers = std::get<2>(RHS);
9350 MapType = std::get<1>(LHS);
9351 bool HasPresentR =
9352 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9353 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9354 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9355 });
9356
9357 auto GenerateInfoForComponentLists =
9358 [&](ArrayRef<MapData> DeclComponentLists,
9359 bool IsEligibleForTargetParamFlag) {
9360 MapCombinedInfoTy CurInfoForComponentLists;
9361 StructRangeInfoTy PartialStruct;
9362
9363 if (DeclComponentLists.empty())
9364 return;
9365
9366 generateInfoForCaptureFromComponentLists(
9367 VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
9368 IsEligibleForTargetParamFlag,
9369 /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
9370
9371 // If there is an entry in PartialStruct it means we have a
9372 // struct with individual members mapped. Emit an extra combined
9373 // entry.
9374 if (PartialStruct.Base.isValid()) {
9375 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
9376 emitCombinedEntry(
9377 CurCaptureVarInfo, CurInfoForComponentLists.Types,
9378 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
9379 OffsetForMemberOfFlag,
9380 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
9381 }
9382
9383 // Return if we didn't add any entries.
9384 if (CurInfoForComponentLists.BasePointers.empty())
9385 return;
9386
9387 CurCaptureVarInfo.append(CurInfoForComponentLists);
9388 };
9389
9390 GenerateInfoForComponentLists(DeclComponentLists,
9391 /*IsEligibleForTargetParamFlag=*/true);
9392 }
9393
9394 /// Generate the base pointers, section pointers, sizes, map types, and
9395 /// mappers associated to \a DeclComponentLists for a given capture
9396 /// \a VD (all included in \a CurComponentListInfo).
9397 void generateInfoForCaptureFromComponentLists(
9398 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
9399 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
9400 bool IsListEligibleForTargetParamFlag,
9401 bool AreBothBasePtrAndPteeMapped = false) const {
9402 // Find overlapping elements (including the offset from the base element).
9403 llvm::SmallDenseMap<
9404 const MapData *,
9405 llvm::SmallVector<
9407 4>
9408 OverlappedData;
9409 size_t Count = 0;
9410 for (const MapData &L : DeclComponentLists) {
9412 OpenMPMapClauseKind MapType;
9413 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9414 bool IsImplicit;
9415 const ValueDecl *Mapper;
9416 const Expr *VarRef;
9417 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9418 L;
9419 ++Count;
9420 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9422 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9423 VarRef) = L1;
9424 auto CI = Components.rbegin();
9425 auto CE = Components.rend();
9426 auto SI = Components1.rbegin();
9427 auto SE = Components1.rend();
9428 for (; CI != CE && SI != SE; ++CI, ++SI) {
9429 if (CI->getAssociatedExpression()->getStmtClass() !=
9430 SI->getAssociatedExpression()->getStmtClass())
9431 break;
9432 // Are we dealing with different variables/fields?
9433 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9434 break;
9435 }
9436 // Found overlapping if, at least for one component, reached the head
9437 // of the components list.
9438 if (CI == CE || SI == SE) {
9439 // Ignore it if it is the same component.
9440 if (CI == CE && SI == SE)
9441 continue;
9442 const auto It = (SI == SE) ? CI : SI;
9443 // If one component is a pointer and another one is a kind of
9444 // dereference of this pointer (array subscript, section, dereference,
9445 // etc.), it is not an overlapping.
9446 // Same, if one component is a base and another component is a
9447 // dereferenced pointer memberexpr with the same base.
9448 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9449 (std::prev(It)->getAssociatedDeclaration() &&
9450 std::prev(It)
9451 ->getAssociatedDeclaration()
9452 ->getType()
9453 ->isPointerType()) ||
9454 (It->getAssociatedDeclaration() &&
9455 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9456 std::next(It) != CE && std::next(It) != SE))
9457 continue;
9458 const MapData &BaseData = CI == CE ? L : L1;
9460 SI == SE ? Components : Components1;
9461 OverlappedData[&BaseData].push_back(SubData);
9462 }
9463 }
9464 }
9465 // Sort the overlapped elements for each item.
9466 llvm::SmallVector<const FieldDecl *, 4> Layout;
9467 if (!OverlappedData.empty()) {
9468 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9469 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9470 while (BaseType != OrigType) {
9471 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9472 OrigType = BaseType->getPointeeOrArrayElementType();
9473 }
9474
9475 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9476 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9477 else {
9478 const auto *RD = BaseType->getAsRecordDecl();
9479 Layout.append(RD->field_begin(), RD->field_end());
9480 }
9481 }
9482 for (auto &Pair : OverlappedData) {
9483 llvm::stable_sort(
9484 Pair.getSecond(),
9485 [&Layout](
9488 Second) {
9489 auto CI = First.rbegin();
9490 auto CE = First.rend();
9491 auto SI = Second.rbegin();
9492 auto SE = Second.rend();
9493 for (; CI != CE && SI != SE; ++CI, ++SI) {
9494 if (CI->getAssociatedExpression()->getStmtClass() !=
9495 SI->getAssociatedExpression()->getStmtClass())
9496 break;
9497 // Are we dealing with different variables/fields?
9498 if (CI->getAssociatedDeclaration() !=
9499 SI->getAssociatedDeclaration())
9500 break;
9501 }
9502
9503 // Lists contain the same elements.
9504 if (CI == CE && SI == SE)
9505 return false;
9506
9507 // List with less elements is less than list with more elements.
9508 if (CI == CE || SI == SE)
9509 return CI == CE;
9510
9511 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9512 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9513 if (FD1->getParent() == FD2->getParent())
9514 return FD1->getFieldIndex() < FD2->getFieldIndex();
9515 const auto *It =
9516 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9517 return FD == FD1 || FD == FD2;
9518 });
9519 return *It == FD1;
9520 });
9521 }
9522
9523 // Associated with a capture, because the mapping flags depend on it.
9524 // Go through all of the elements with the overlapped elements.
9525 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9526 MapCombinedInfoTy StructBaseCombinedInfo;
9527 for (const auto &Pair : OverlappedData) {
9528 const MapData &L = *Pair.getFirst();
9530 OpenMPMapClauseKind MapType;
9531 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9532 bool IsImplicit;
9533 const ValueDecl *Mapper;
9534 const Expr *VarRef;
9535 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9536 L;
9537 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9538 OverlappedComponents = Pair.getSecond();
9539 generateInfoForComponentList(
9540 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9541 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9542 /*GenerateAllInfoForClauses*/ false, Mapper,
9543 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9544 AddTargetParamFlag = false;
9545 }
9546 // Go through other elements without overlapped elements.
9547 for (const MapData &L : DeclComponentLists) {
9549 OpenMPMapClauseKind MapType;
9550 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9551 bool IsImplicit;
9552 const ValueDecl *Mapper;
9553 const Expr *VarRef;
9554 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9555 L;
9556 auto It = OverlappedData.find(&L);
9557 if (It == OverlappedData.end())
9558 generateInfoForComponentList(
9559 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9560 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9561 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9562 /*ForDeviceAddr=*/false, VD, VarRef,
9563 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9564 AddTargetParamFlag = false;
9565 }
9566 }
9567
9568 /// Generate the default map information for a given capture \a CI,
9569 /// record field declaration \a RI and captured value \a CV.
9570 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9571 const FieldDecl &RI, llvm::Value *CV,
9572 MapCombinedInfoTy &CombinedInfo) const {
9573 bool IsImplicit = true;
9574 // Do the default mapping.
9575 if (CI.capturesThis()) {
9576 CombinedInfo.Exprs.push_back(nullptr);
9577 CombinedInfo.BasePointers.push_back(CV);
9578 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9579 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9580 CombinedInfo.Pointers.push_back(CV);
9581 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9582 CombinedInfo.Sizes.push_back(
9583 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9584 CGF.Int64Ty, /*isSigned=*/true));
9585 // Default map type.
9586 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9587 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9588 } else if (CI.capturesVariableByCopy()) {
9589 const VarDecl *VD = CI.getCapturedVar();
9590 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9591 CombinedInfo.BasePointers.push_back(CV);
9592 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9593 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9594 CombinedInfo.Pointers.push_back(CV);
9595 if (!RI.getType()->isAnyPointerType()) {
9596 // We have to signal to the runtime captures passed by value that are
9597 // not pointers.
9598 CombinedInfo.Types.push_back(
9599 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9600 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9601 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9602 } else {
9603 // Pointers are implicitly mapped with a zero size and no flags
9604 // (other than first map that is added for all implicit maps).
9605 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9606 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9607 }
9608 auto I = FirstPrivateDecls.find(VD);
9609 if (I != FirstPrivateDecls.end())
9610 IsImplicit = I->getSecond();
9611 } else {
9612 assert(CI.capturesVariable() && "Expected captured reference.");
9613 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9614 QualType ElementType = PtrTy->getPointeeType();
9615 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9616 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9617 // The default map type for a scalar/complex type is 'to' because by
9618 // default the value doesn't have to be retrieved. For an aggregate
9619 // type, the default is 'tofrom'.
9620 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9621 const VarDecl *VD = CI.getCapturedVar();
9622 auto I = FirstPrivateDecls.find(VD);
9623 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9624 CombinedInfo.BasePointers.push_back(CV);
9625 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9626 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9627 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9628 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9629 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9631 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
9632 } else {
9633 CombinedInfo.Pointers.push_back(CV);
9634 }
9635 if (I != FirstPrivateDecls.end())
9636 IsImplicit = I->getSecond();
9637 }
9638 // Every default map produces a single argument which is a target parameter.
9639 CombinedInfo.Types.back() |=
9640 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9641
9642 // Add flag stating this is an implicit map.
9643 if (IsImplicit)
9644 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9645
9646 // No user-defined mapper for default mapping.
9647 CombinedInfo.Mappers.push_back(nullptr);
9648 }
9649};
9650} // anonymous namespace
9651
9652// Try to extract the base declaration from a `this->x` expression if possible.
9654 if (!E)
9655 return nullptr;
9656
9657 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9658 if (const MemberExpr *ME =
9659 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9660 return ME->getMemberDecl();
9661 return nullptr;
9662}
9663
9664/// Emit a string constant containing the names of the values mapped to the
9665/// offloading runtime library.
9666static llvm::Constant *
9667emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9668 MappableExprsHandler::MappingExprInfo &MapExprs) {
9669
9670 uint32_t SrcLocStrSize;
9671 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9672 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9673
9674 SourceLocation Loc;
9675 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9676 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9677 Loc = VD->getLocation();
9678 else
9679 Loc = MapExprs.getMapExpr()->getExprLoc();
9680 } else {
9681 Loc = MapExprs.getMapDecl()->getLocation();
9682 }
9683
9684 std::string ExprName;
9685 if (MapExprs.getMapExpr()) {
9687 llvm::raw_string_ostream OS(ExprName);
9688 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9689 } else {
9690 ExprName = MapExprs.getMapDecl()->getNameAsString();
9691 }
9692
9693 std::string FileName;
9695 if (auto *DbgInfo = CGF.getDebugInfo())
9696 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9697 else
9698 FileName = PLoc.getFilename();
9699 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9700 PLoc.getColumn(), SrcLocStrSize);
9701}
9702/// Emit the arrays used to pass the captures and map information to the
9703/// offloading runtime library. If there is no map or capture information,
9704/// return nullptr by reference.
9706 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9707 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9708 bool IsNonContiguous = false, bool ForEndCall = false) {
9709 CodeGenModule &CGM = CGF.CGM;
9710
9711 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9712 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9713 CGF.AllocaInsertPt->getIterator());
9714 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9715 CGF.Builder.GetInsertPoint());
9716
9717 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9718 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9719 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9720 }
9721 };
9722
9723 auto CustomMapperCB = [&](unsigned int I) {
9724 llvm::Function *MFunc = nullptr;
9725 if (CombinedInfo.Mappers[I]) {
9726 Info.HasMapper = true;
9728 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9729 }
9730 return MFunc;
9731 };
9732 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9733 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9734 IsNonContiguous, ForEndCall, DeviceAddrCB));
9735}
9736
9737/// Check for inner distribute directive.
9738static const OMPExecutableDirective *
9740 const auto *CS = D.getInnermostCapturedStmt();
9741 const auto *Body =
9742 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9743 const Stmt *ChildStmt =
9745
9746 if (const auto *NestedDir =
9747 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9748 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9749 switch (D.getDirectiveKind()) {
9750 case OMPD_target:
9751 // For now, treat 'target' with nested 'teams loop' as if it's
9752 // distributed (target teams distribute).
9753 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9754 return NestedDir;
9755 if (DKind == OMPD_teams) {
9756 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9757 /*IgnoreCaptured=*/true);
9758 if (!Body)
9759 return nullptr;
9760 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9761 if (const auto *NND =
9762 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9763 DKind = NND->getDirectiveKind();
9764 if (isOpenMPDistributeDirective(DKind))
9765 return NND;
9766 }
9767 }
9768 return nullptr;
9769 case OMPD_target_teams:
9770 if (isOpenMPDistributeDirective(DKind))
9771 return NestedDir;
9772 return nullptr;
9773 case OMPD_target_parallel:
9774 case OMPD_target_simd:
9775 case OMPD_target_parallel_for:
9776 case OMPD_target_parallel_for_simd:
9777 return nullptr;
9778 case OMPD_target_teams_distribute:
9779 case OMPD_target_teams_distribute_simd:
9780 case OMPD_target_teams_distribute_parallel_for:
9781 case OMPD_target_teams_distribute_parallel_for_simd:
9782 case OMPD_parallel:
9783 case OMPD_for:
9784 case OMPD_parallel_for:
9785 case OMPD_parallel_master:
9786 case OMPD_parallel_sections:
9787 case OMPD_for_simd:
9788 case OMPD_parallel_for_simd:
9789 case OMPD_cancel:
9790 case OMPD_cancellation_point:
9791 case OMPD_ordered:
9792 case OMPD_threadprivate:
9793 case OMPD_allocate:
9794 case OMPD_task:
9795 case OMPD_simd:
9796 case OMPD_tile:
9797 case OMPD_unroll:
9798 case OMPD_sections:
9799 case OMPD_section:
9800 case OMPD_single:
9801 case OMPD_master:
9802 case OMPD_critical:
9803 case OMPD_taskyield:
9804 case OMPD_barrier:
9805 case OMPD_taskwait:
9806 case OMPD_taskgroup:
9807 case OMPD_atomic:
9808 case OMPD_flush:
9809 case OMPD_depobj:
9810 case OMPD_scan:
9811 case OMPD_teams:
9812 case OMPD_target_data:
9813 case OMPD_target_exit_data:
9814 case OMPD_target_enter_data:
9815 case OMPD_distribute:
9816 case OMPD_distribute_simd:
9817 case OMPD_distribute_parallel_for:
9818 case OMPD_distribute_parallel_for_simd:
9819 case OMPD_teams_distribute:
9820 case OMPD_teams_distribute_simd:
9821 case OMPD_teams_distribute_parallel_for:
9822 case OMPD_teams_distribute_parallel_for_simd:
9823 case OMPD_target_update:
9824 case OMPD_declare_simd:
9825 case OMPD_declare_variant:
9826 case OMPD_begin_declare_variant:
9827 case OMPD_end_declare_variant:
9828 case OMPD_declare_target:
9829 case OMPD_end_declare_target:
9830 case OMPD_declare_reduction:
9831 case OMPD_declare_mapper:
9832 case OMPD_taskloop:
9833 case OMPD_taskloop_simd:
9834 case OMPD_master_taskloop:
9835 case OMPD_master_taskloop_simd:
9836 case OMPD_parallel_master_taskloop:
9837 case OMPD_parallel_master_taskloop_simd:
9838 case OMPD_requires:
9839 case OMPD_metadirective:
9840 case OMPD_unknown:
9841 default:
9842 llvm_unreachable("Unexpected directive.");
9843 }
9844 }
9845
9846 return nullptr;
9847}
9848
9849/// Emit the user-defined mapper function. The code generation follows the
9850/// pattern in the example below.
9851/// \code
9852/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9853/// void *base, void *begin,
9854/// int64_t size, int64_t type,
9855/// void *name = nullptr) {
9856/// // Allocate space for an array section first or add a base/begin for
9857/// // pointer dereference.
9858/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9859/// !maptype.IsDelete)
9860/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9861/// size*sizeof(Ty), clearToFromMember(type));
9862/// // Map members.
9863/// for (unsigned i = 0; i < size; i++) {
9864/// // For each component specified by this mapper:
9865/// for (auto c : begin[i]->all_components) {
9866/// if (c.hasMapper())
9867/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9868/// c.arg_type, c.arg_name);
9869/// else
9870/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9871/// c.arg_begin, c.arg_size, c.arg_type,
9872/// c.arg_name);
9873/// }
9874/// }
9875/// // Delete the array section.
9876/// if (size > 1 && maptype.IsDelete)
9877/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9878/// size*sizeof(Ty), clearToFromMember(type));
9879/// }
9880/// \endcode
9882 CodeGenFunction *CGF) {
9883 if (UDMMap.count(D) > 0)
9884 return;
9885 ASTContext &C = CGM.getContext();
9886 QualType Ty = D->getType();
9887 auto *MapperVarDecl =
9889 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9890 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9891
9892 CodeGenFunction MapperCGF(CGM);
9893 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9894 auto PrivatizeAndGenMapInfoCB =
9895 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9896 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9897 MapperCGF.Builder.restoreIP(CodeGenIP);
9898
9899 // Privatize the declared variable of mapper to be the current array
9900 // element.
9901 Address PtrCurrent(
9902 PtrPHI, ElemTy,
9903 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9904 .getAlignment()
9905 .alignmentOfArrayElement(ElementSize));
9907 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9908 (void)Scope.Privatize();
9909
9910 // Get map clause information.
9911 MappableExprsHandler MEHandler(*D, MapperCGF);
9912 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9913
9914 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9915 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9916 };
9917 if (CGM.getCodeGenOpts().getDebugInfo() !=
9918 llvm::codegenoptions::NoDebugInfo) {
9919 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9920 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9921 FillInfoMap);
9922 }
9923
9924 return CombinedInfo;
9925 };
9926
9927 auto CustomMapperCB = [&](unsigned I) {
9928 llvm::Function *MapperFunc = nullptr;
9929 if (CombinedInfo.Mappers[I]) {
9930 // Call the corresponding mapper function.
9932 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9933 assert(MapperFunc && "Expect a valid mapper function is available.");
9934 }
9935 return MapperFunc;
9936 };
9937
9938 SmallString<64> TyStr;
9939 llvm::raw_svector_ostream Out(TyStr);
9940 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9941 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9942
9943 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
9944 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
9945 UDMMap.try_emplace(D, NewFn);
9946 if (CGF)
9947 FunctionUDMMap[CGF->CurFn].push_back(D);
9948}
9949
9951 const OMPDeclareMapperDecl *D) {
9952 auto I = UDMMap.find(D);
9953 if (I != UDMMap.end())
9954 return I->second;
9956 return UDMMap.lookup(D);
9957}
9958
9961 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9962 const OMPLoopDirective &D)>
9963 SizeEmitter) {
9964 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9965 const OMPExecutableDirective *TD = &D;
9966 // Get nested teams distribute kind directive, if any. For now, treat
9967 // 'target_teams_loop' as if it's really a target_teams_distribute.
9968 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9969 Kind != OMPD_target_teams_loop)
9970 TD = getNestedDistributeDirective(CGM.getContext(), D);
9971 if (!TD)
9972 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9973
9974 const auto *LD = cast<OMPLoopDirective>(TD);
9975 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9976 return NumIterations;
9977 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9978}
9979
9980static void
9981emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9982 const OMPExecutableDirective &D,
9984 bool RequiresOuterTask, const CapturedStmt &CS,
9985 bool OffloadingMandatory, CodeGenFunction &CGF) {
9986 if (OffloadingMandatory) {
9987 CGF.Builder.CreateUnreachable();
9988 } else {
9989 if (RequiresOuterTask) {
9990 CapturedVars.clear();
9991 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9992 }
9993 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9994 CapturedVars);
9995 }
9996}
9997
9998static llvm::Value *emitDeviceID(
9999 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10000 CodeGenFunction &CGF) {
10001 // Emit device ID if any.
10002 llvm::Value *DeviceID;
10003 if (Device.getPointer()) {
10004 assert((Device.getInt() == OMPC_DEVICE_unknown ||
10005 Device.getInt() == OMPC_DEVICE_device_num) &&
10006 "Expected device_num modifier.");
10007 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10008 DeviceID =
10009 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10010 } else {
10011 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10012 }
10013 return DeviceID;
10014}
10015
10016static std::pair<llvm::Value *, OMPDynGroupprivateFallbackType>
10018 llvm::Value *DynGP = CGF.Builder.getInt32(0);
10019 auto DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10020
10021 if (auto *DynGPClause = D.getSingleClause<OMPDynGroupprivateClause>()) {
10022 CodeGenFunction::RunCleanupsScope DynGPScope(CGF);
10023 llvm::Value *DynGPVal =
10024 CGF.EmitScalarExpr(DynGPClause->getSize(), /*IgnoreResultAssign=*/true);
10025 DynGP = CGF.Builder.CreateIntCast(DynGPVal, CGF.Int32Ty,
10026 /*isSigned=*/false);
10027 auto FallbackModifier = DynGPClause->getDynGroupprivateFallbackModifier();
10028 switch (FallbackModifier) {
10029 case OMPC_DYN_GROUPPRIVATE_FALLBACK_abort:
10030 DynGPFallback = OMPDynGroupprivateFallbackType::Abort;
10031 break;
10032 case OMPC_DYN_GROUPPRIVATE_FALLBACK_null:
10033 DynGPFallback = OMPDynGroupprivateFallbackType::Null;
10034 break;
10035 case OMPC_DYN_GROUPPRIVATE_FALLBACK_default_mem:
10037 // This is the default for dyn_groupprivate.
10038 DynGPFallback = OMPDynGroupprivateFallbackType::DefaultMem;
10039 break;
10040 default:
10041 llvm_unreachable("Unknown fallback modifier for OpenMP dyn_groupprivate");
10042 }
10043 } else if (auto *OMPXDynCGClause =
10044 D.getSingleClause<OMPXDynCGroupMemClause>()) {
10045 CodeGenFunction::RunCleanupsScope DynCGMemScope(CGF);
10046 llvm::Value *DynCGMemVal = CGF.EmitScalarExpr(OMPXDynCGClause->getSize(),
10047 /*IgnoreResultAssign=*/true);
10048 DynGP = CGF.Builder.CreateIntCast(DynCGMemVal, CGF.Int32Ty,
10049 /*isSigned=*/false);
10050 }
10051 return {DynGP, DynGPFallback};
10052}
10053
10055 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10056 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
10057 llvm::OpenMPIRBuilder &OMPBuilder,
10058 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
10059 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10060
10061 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10062 auto RI = CS.getCapturedRecordDecl()->field_begin();
10063 auto *CV = CapturedVars.begin();
10065 CE = CS.capture_end();
10066 CI != CE; ++CI, ++RI, ++CV) {
10067 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10068
10069 // VLA sizes are passed to the outlined region by copy and do not have map
10070 // information associated.
10071 if (CI->capturesVariableArrayType()) {
10072 CurInfo.Exprs.push_back(nullptr);
10073 CurInfo.BasePointers.push_back(*CV);
10074 CurInfo.DevicePtrDecls.push_back(nullptr);
10075 CurInfo.DevicePointers.push_back(
10076 MappableExprsHandler::DeviceInfoTy::None);
10077 CurInfo.Pointers.push_back(*CV);
10078 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10079 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10080 // Copy to the device as an argument. No need to retrieve it.
10081 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10082 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10083 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10084 CurInfo.Mappers.push_back(nullptr);
10085 } else {
10086 // If we have any information in the map clause, we use it, otherwise we
10087 // just do a default mapping.
10088 MEHandler.generateInfoForCaptureFromClauseInfo(
10089 CI, *CV, CurInfo, OMPBuilder,
10090 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
10091
10092 if (!CI->capturesThis())
10093 MappedVarSet.insert(CI->getCapturedVar());
10094 else
10095 MappedVarSet.insert(nullptr);
10096
10097 if (CurInfo.BasePointers.empty())
10098 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10099
10100 // Generate correct mapping for variables captured by reference in
10101 // lambdas.
10102 if (CI->capturesVariable())
10103 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10104 CurInfo, LambdaPointers);
10105 }
10106 // We expect to have at least an element of information for this capture.
10107 assert(!CurInfo.BasePointers.empty() &&
10108 "Non-existing map pointer for capture!");
10109 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
10110 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
10111 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
10112 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
10113 "Inconsistent map information sizes!");
10114
10115 // We need to append the results of this capture to what we already have.
10116 CombinedInfo.append(CurInfo);
10117 }
10118 // Adjust MEMBER_OF flags for the lambdas captures.
10119 MEHandler.adjustMemberOfForLambdaCaptures(
10120 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
10121 CombinedInfo.Pointers, CombinedInfo.Types);
10122}
10123static void
10124genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
10125 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
10126 llvm::OpenMPIRBuilder &OMPBuilder,
10127 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
10128 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
10129
10130 CodeGenModule &CGM = CGF.CGM;
10131 // Map any list items in a map clause that were not captures because they
10132 // weren't referenced within the construct.
10133 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
10134
10135 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10136 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10137 };
10138 if (CGM.getCodeGenOpts().getDebugInfo() !=
10139 llvm::codegenoptions::NoDebugInfo) {
10140 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10141 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10142 FillInfoMap);
10143 }
10144}
10145
10147 const CapturedStmt &CS,
10149 llvm::OpenMPIRBuilder &OMPBuilder,
10150 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
10151 // Get mappable expression information.
10152 MappableExprsHandler MEHandler(D, CGF);
10153 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10154
10155 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
10156 MappedVarSet, CombinedInfo);
10157 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
10158}
10159
10160template <typename ClauseTy>
10161static void
10163 const OMPExecutableDirective &D,
10165 const auto *C = D.getSingleClause<ClauseTy>();
10166 assert(!C->varlist_empty() &&
10167 "ompx_bare requires explicit num_teams and thread_limit");
10169 for (auto *E : C->varlist()) {
10170 llvm::Value *V = CGF.EmitScalarExpr(E);
10171 Values.push_back(
10172 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
10173 }
10174}
10175
10177 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10178 const OMPExecutableDirective &D,
10179 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
10180 const CapturedStmt &CS, bool OffloadingMandatory,
10181 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10182 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
10183 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
10184 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10185 const OMPLoopDirective &D)>
10186 SizeEmitter,
10187 CodeGenFunction &CGF, CodeGenModule &CGM) {
10188 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
10189
10190 // Fill up the arrays with all the captured variables.
10191 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10193 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
10194
10195 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10196 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10197
10198 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10199 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10200 CGF.VoidPtrTy, CGM.getPointerAlign());
10201 InputInfo.PointersArray =
10202 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10203 InputInfo.SizesArray =
10204 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10205 InputInfo.MappersArray =
10206 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10207 MapTypesArray = Info.RTArgs.MapTypesArray;
10208 MapNamesArray = Info.RTArgs.MapNamesArray;
10209
10210 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
10211 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10212 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
10213 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10214 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
10215
10216 if (IsReverseOffloading) {
10217 // Reverse offloading is not supported, so just execute on the host.
10218 // FIXME: This fallback solution is incorrect since it ignores the
10219 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
10220 // assert here and ensure SEMA emits an error.
10221 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10222 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10223 return;
10224 }
10225
10226 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10227 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
10228
10229 llvm::Value *BasePointersArray =
10230 InputInfo.BasePointersArray.emitRawPointer(CGF);
10231 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
10232 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
10233 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
10234
10235 auto &&EmitTargetCallFallbackCB =
10236 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10237 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
10238 -> llvm::OpenMPIRBuilder::InsertPointTy {
10239 CGF.Builder.restoreIP(IP);
10240 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10241 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10242 return CGF.Builder.saveIP();
10243 };
10244
10245 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
10248 if (IsBare) {
10251 NumThreads);
10252 } else {
10253 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
10254 NumThreads.push_back(
10255 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
10256 }
10257
10258 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
10259 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
10260 llvm::Value *NumIterations =
10261 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10262 auto [DynCGroupMem, DynCGroupMemFallback] = emitDynCGroupMem(D, CGF);
10263 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
10264 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
10265
10266 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
10267 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
10268 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
10269
10270 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
10271 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
10272 DynCGroupMem, HasNoWait, DynCGroupMemFallback);
10273
10274 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10275 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
10276 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
10277 RTLoc, AllocaIP));
10278 CGF.Builder.restoreIP(AfterIP);
10279 };
10280
10281 if (RequiresOuterTask)
10282 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10283 else
10284 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10285}
10286
10287static void
10288emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
10289 const OMPExecutableDirective &D,
10291 bool RequiresOuterTask, const CapturedStmt &CS,
10292 bool OffloadingMandatory, CodeGenFunction &CGF) {
10293
10294 // Notify that the host version must be executed.
10295 auto &&ElseGen =
10296 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10297 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10298 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
10299 RequiresOuterTask, CS, OffloadingMandatory, CGF);
10300 };
10301
10302 if (RequiresOuterTask) {
10304 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10305 } else {
10306 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10307 }
10308}
10309
10312 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10313 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10314 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10315 const OMPLoopDirective &D)>
10316 SizeEmitter) {
10317 if (!CGF.HaveInsertPoint())
10318 return;
10319
10320 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
10321 CGM.getLangOpts().OpenMPOffloadMandatory;
10322
10323 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
10324
10325 const bool RequiresOuterTask =
10326 D.hasClausesOfKind<OMPDependClause>() ||
10327 D.hasClausesOfKind<OMPNowaitClause>() ||
10328 D.hasClausesOfKind<OMPInReductionClause>() ||
10329 (CGM.getLangOpts().OpenMP >= 51 &&
10330 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
10331 D.hasClausesOfKind<OMPThreadLimitClause>());
10333 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10334 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10335 PrePostActionTy &) {
10336 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10337 };
10338 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10339
10341 llvm::Value *MapTypesArray = nullptr;
10342 llvm::Value *MapNamesArray = nullptr;
10343
10344 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
10345 RequiresOuterTask, &CS, OffloadingMandatory, Device,
10346 OutlinedFnID, &InputInfo, &MapTypesArray,
10347 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
10348 PrePostActionTy &) {
10349 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
10350 RequiresOuterTask, CS, OffloadingMandatory,
10351 Device, OutlinedFnID, InputInfo, MapTypesArray,
10352 MapNamesArray, SizeEmitter, CGF, CGM);
10353 };
10354
10355 auto &&TargetElseGen =
10356 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
10357 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
10358 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
10359 CS, OffloadingMandatory, CGF);
10360 };
10361
10362 // If we have a target function ID it means that we need to support
10363 // offloading, otherwise, just execute on the host. We need to execute on host
10364 // regardless of the conditional in the if clause if, e.g., the user do not
10365 // specify target triples.
10366 if (OutlinedFnID) {
10367 if (IfCond) {
10368 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10369 } else {
10370 RegionCodeGenTy ThenRCG(TargetThenGen);
10371 ThenRCG(CGF);
10372 }
10373 } else {
10374 RegionCodeGenTy ElseRCG(TargetElseGen);
10375 ElseRCG(CGF);
10376 }
10377}
10378
10380 StringRef ParentName) {
10381 if (!S)
10382 return;
10383
10384 // Codegen OMP target directives that offload compute to the device.
10385 bool RequiresDeviceCodegen =
10388 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10389
10390 if (RequiresDeviceCodegen) {
10391 const auto &E = *cast<OMPExecutableDirective>(S);
10392
10393 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
10394 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
10395
10396 // Is this a target region that should not be emitted as an entry point? If
10397 // so just signal we are done with this target region.
10398 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10399 return;
10400
10401 switch (E.getDirectiveKind()) {
10402 case OMPD_target:
10405 break;
10406 case OMPD_target_parallel:
10408 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10409 break;
10410 case OMPD_target_teams:
10412 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10413 break;
10414 case OMPD_target_teams_distribute:
10417 break;
10418 case OMPD_target_teams_distribute_simd:
10421 break;
10422 case OMPD_target_parallel_for:
10425 break;
10426 case OMPD_target_parallel_for_simd:
10429 break;
10430 case OMPD_target_simd:
10432 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10433 break;
10434 case OMPD_target_teams_distribute_parallel_for:
10436 CGM, ParentName,
10438 break;
10439 case OMPD_target_teams_distribute_parallel_for_simd:
10442 CGM, ParentName,
10444 break;
10445 case OMPD_target_teams_loop:
10448 break;
10449 case OMPD_target_parallel_loop:
10452 break;
10453 case OMPD_parallel:
10454 case OMPD_for:
10455 case OMPD_parallel_for:
10456 case OMPD_parallel_master:
10457 case OMPD_parallel_sections:
10458 case OMPD_for_simd:
10459 case OMPD_parallel_for_simd:
10460 case OMPD_cancel:
10461 case OMPD_cancellation_point:
10462 case OMPD_ordered:
10463 case OMPD_threadprivate:
10464 case OMPD_allocate:
10465 case OMPD_task:
10466 case OMPD_simd:
10467 case OMPD_tile:
10468 case OMPD_unroll:
10469 case OMPD_sections:
10470 case OMPD_section:
10471 case OMPD_single:
10472 case OMPD_master:
10473 case OMPD_critical:
10474 case OMPD_taskyield:
10475 case OMPD_barrier:
10476 case OMPD_taskwait:
10477 case OMPD_taskgroup:
10478 case OMPD_atomic:
10479 case OMPD_flush:
10480 case OMPD_depobj:
10481 case OMPD_scan:
10482 case OMPD_teams:
10483 case OMPD_target_data:
10484 case OMPD_target_exit_data:
10485 case OMPD_target_enter_data:
10486 case OMPD_distribute:
10487 case OMPD_distribute_simd:
10488 case OMPD_distribute_parallel_for:
10489 case OMPD_distribute_parallel_for_simd:
10490 case OMPD_teams_distribute:
10491 case OMPD_teams_distribute_simd:
10492 case OMPD_teams_distribute_parallel_for:
10493 case OMPD_teams_distribute_parallel_for_simd:
10494 case OMPD_target_update:
10495 case OMPD_declare_simd:
10496 case OMPD_declare_variant:
10497 case OMPD_begin_declare_variant:
10498 case OMPD_end_declare_variant:
10499 case OMPD_declare_target:
10500 case OMPD_end_declare_target:
10501 case OMPD_declare_reduction:
10502 case OMPD_declare_mapper:
10503 case OMPD_taskloop:
10504 case OMPD_taskloop_simd:
10505 case OMPD_master_taskloop:
10506 case OMPD_master_taskloop_simd:
10507 case OMPD_parallel_master_taskloop:
10508 case OMPD_parallel_master_taskloop_simd:
10509 case OMPD_requires:
10510 case OMPD_metadirective:
10511 case OMPD_unknown:
10512 default:
10513 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10514 }
10515 return;
10516 }
10517
10518 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10519 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10520 return;
10521
10522 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10523 return;
10524 }
10525
10526 // If this is a lambda function, look into its body.
10527 if (const auto *L = dyn_cast<LambdaExpr>(S))
10528 S = L->getBody();
10529
10530 // Keep looking for target regions recursively.
10531 for (const Stmt *II : S->children())
10532 scanForTargetRegionsFunctions(II, ParentName);
10533}
10534
10535static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10536 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10537 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10538 if (!DevTy)
10539 return false;
10540 // Do not emit device_type(nohost) functions for the host.
10541 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10542 return true;
10543 // Do not emit device_type(host) functions for the device.
10544 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10545 return true;
10546 return false;
10547}
10548
10550 // If emitting code for the host, we do not process FD here. Instead we do
10551 // the normal code generation.
10552 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10553 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10555 CGM.getLangOpts().OpenMPIsTargetDevice))
10556 return true;
10557 return false;
10558 }
10559
10560 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10561 // Try to detect target regions in the function.
10562 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10563 StringRef Name = CGM.getMangledName(GD);
10566 CGM.getLangOpts().OpenMPIsTargetDevice))
10567 return true;
10568 }
10569
10570 // Do not to emit function if it is not marked as declare target.
10571 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10572 AlreadyEmittedTargetDecls.count(VD) == 0;
10573}
10574
10577 CGM.getLangOpts().OpenMPIsTargetDevice))
10578 return true;
10579
10580 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10581 return false;
10582
10583 // Check if there are Ctors/Dtors in this declaration and look for target
10584 // regions in it. We use the complete variant to produce the kernel name
10585 // mangling.
10586 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10587 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10588 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10589 StringRef ParentName =
10590 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10591 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10592 }
10593 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10594 StringRef ParentName =
10595 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10596 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10597 }
10598 }
10599
10600 // Do not to emit variable if it is not marked as declare target.
10601 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10602 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10603 cast<VarDecl>(GD.getDecl()));
10604 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10605 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10606 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10609 return true;
10610 }
10611 return false;
10612}
10613
10615 llvm::Constant *Addr) {
10616 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10617 !CGM.getLangOpts().OpenMPIsTargetDevice)
10618 return;
10619
10620 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10621 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10622
10623 // If this is an 'extern' declaration we defer to the canonical definition and
10624 // do not emit an offloading entry.
10625 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10626 VD->hasExternalStorage())
10627 return;
10628
10629 if (!Res) {
10630 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10631 // Register non-target variables being emitted in device code (debug info
10632 // may cause this).
10633 StringRef VarName = CGM.getMangledName(VD);
10634 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10635 }
10636 return;
10637 }
10638
10639 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10640 auto LinkageForVariable = [&VD, this]() {
10641 return CGM.getLLVMLinkageVarDefinition(VD);
10642 };
10643
10644 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10645 OMPBuilder.registerTargetGlobalVariable(
10647 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10648 VD->isExternallyVisible(),
10650 VD->getCanonicalDecl()->getBeginLoc()),
10651 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10652 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10653 CGM.getTypes().ConvertTypeForMem(
10654 CGM.getContext().getPointerType(VD->getType())),
10655 Addr);
10656
10657 for (auto *ref : GeneratedRefs)
10658 CGM.addCompilerUsedGlobal(ref);
10659}
10660
10662 if (isa<FunctionDecl>(GD.getDecl()) ||
10664 return emitTargetFunctions(GD);
10665
10666 return emitTargetGlobalVariable(GD);
10667}
10668
10670 for (const VarDecl *VD : DeferredGlobalVariables) {
10671 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10672 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10673 if (!Res)
10674 continue;
10675 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10676 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10678 CGM.EmitGlobal(VD);
10679 } else {
10680 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10681 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10682 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10684 "Expected link clause or to clause with unified memory.");
10685 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10686 }
10687 }
10688}
10689
10691 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10692 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10693 " Expected target-based directive.");
10694}
10695
10697 for (const OMPClause *Clause : D->clauselists()) {
10698 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10700 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10701 } else if (const auto *AC =
10702 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10703 switch (AC->getAtomicDefaultMemOrderKind()) {
10704 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10705 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10706 break;
10707 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10708 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10709 break;
10710 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10711 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10712 break;
10714 break;
10715 }
10716 }
10717 }
10718}
10719
10720llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10722}
10723
10725 LangAS &AS) {
10726 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10727 return false;
10728 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10729 switch(A->getAllocatorType()) {
10730 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10731 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10732 // Not supported, fallback to the default mem space.
10733 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10734 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10735 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10736 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10737 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10738 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10739 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10740 AS = LangAS::Default;
10741 return true;
10742 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10743 llvm_unreachable("Expected predefined allocator for the variables with the "
10744 "static storage.");
10745 }
10746 return false;
10747}
10748
10752
10754 CodeGenModule &CGM)
10755 : CGM(CGM) {
10756 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10757 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10758 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10759 }
10760}
10761
10763 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10764 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10765}
10766
10768 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10769 return true;
10770
10771 const auto *D = cast<FunctionDecl>(GD.getDecl());
10772 // Do not to emit function if it is marked as declare target as it was already
10773 // emitted.
10774 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10775 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10776 if (auto *F = dyn_cast_or_null<llvm::Function>(
10777 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10778 return !F->isDeclaration();
10779 return false;
10780 }
10781 return true;
10782 }
10783
10784 return !AlreadyEmittedTargetDecls.insert(D).second;
10785}
10786
10788 const OMPExecutableDirective &D,
10789 SourceLocation Loc,
10790 llvm::Function *OutlinedFn,
10791 ArrayRef<llvm::Value *> CapturedVars) {
10792 if (!CGF.HaveInsertPoint())
10793 return;
10794
10795 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10797
10798 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10799 llvm::Value *Args[] = {
10800 RTLoc,
10801 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10802 OutlinedFn};
10804 RealArgs.append(std::begin(Args), std::end(Args));
10805 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10806
10807 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10808 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10809 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10810}
10811
10813 const Expr *NumTeams,
10814 const Expr *ThreadLimit,
10815 SourceLocation Loc) {
10816 if (!CGF.HaveInsertPoint())
10817 return;
10818
10819 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10820
10821 llvm::Value *NumTeamsVal =
10822 NumTeams
10823 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10824 CGF.CGM.Int32Ty, /* isSigned = */ true)
10825 : CGF.Builder.getInt32(0);
10826
10827 llvm::Value *ThreadLimitVal =
10828 ThreadLimit
10829 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10830 CGF.CGM.Int32Ty, /* isSigned = */ true)
10831 : CGF.Builder.getInt32(0);
10832
10833 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10834 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10835 ThreadLimitVal};
10836 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10837 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10838 PushNumTeamsArgs);
10839}
10840
10842 const Expr *ThreadLimit,
10843 SourceLocation Loc) {
10844 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10845 llvm::Value *ThreadLimitVal =
10846 ThreadLimit
10847 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10848 CGF.CGM.Int32Ty, /* isSigned = */ true)
10849 : CGF.Builder.getInt32(0);
10850
10851 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10852 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10853 ThreadLimitVal};
10854 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10855 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10856 ThreadLimitArgs);
10857}
10858
10860 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10861 const Expr *Device, const RegionCodeGenTy &CodeGen,
10863 if (!CGF.HaveInsertPoint())
10864 return;
10865
10866 // Action used to replace the default codegen action and turn privatization
10867 // off.
10868 PrePostActionTy NoPrivAction;
10869
10870 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10871
10872 llvm::Value *IfCondVal = nullptr;
10873 if (IfCond)
10874 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10875
10876 // Emit device ID if any.
10877 llvm::Value *DeviceID = nullptr;
10878 if (Device) {
10879 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10880 CGF.Int64Ty, /*isSigned=*/true);
10881 } else {
10882 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10883 }
10884
10885 // Fill up the arrays with all the mapped variables.
10886 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10887 auto GenMapInfoCB =
10888 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10889 CGF.Builder.restoreIP(CodeGenIP);
10890 // Get map clause information.
10891 MappableExprsHandler MEHandler(D, CGF);
10892 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10893
10894 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10895 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10896 };
10897 if (CGM.getCodeGenOpts().getDebugInfo() !=
10898 llvm::codegenoptions::NoDebugInfo) {
10899 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10900 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10901 FillInfoMap);
10902 }
10903
10904 return CombinedInfo;
10905 };
10906 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10907 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10908 CGF.Builder.restoreIP(CodeGenIP);
10909 switch (BodyGenType) {
10910 case BodyGenTy::Priv:
10911 if (!Info.CaptureDeviceAddrMap.empty())
10912 CodeGen(CGF);
10913 break;
10914 case BodyGenTy::DupNoPriv:
10915 if (!Info.CaptureDeviceAddrMap.empty()) {
10916 CodeGen.setAction(NoPrivAction);
10917 CodeGen(CGF);
10918 }
10919 break;
10920 case BodyGenTy::NoPriv:
10921 if (Info.CaptureDeviceAddrMap.empty()) {
10922 CodeGen.setAction(NoPrivAction);
10923 CodeGen(CGF);
10924 }
10925 break;
10926 }
10927 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10928 CGF.Builder.GetInsertPoint());
10929 };
10930
10931 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10932 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10933 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10934 }
10935 };
10936
10937 auto CustomMapperCB = [&](unsigned int I) {
10938 llvm::Function *MFunc = nullptr;
10939 if (CombinedInfo.Mappers[I]) {
10940 Info.HasMapper = true;
10942 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10943 }
10944 return MFunc;
10945 };
10946
10947 // Source location for the ident struct
10948 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10949
10950 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10951 CGF.AllocaInsertPt->getIterator());
10952 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10953 CGF.Builder.GetInsertPoint());
10954 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10955 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10956 cantFail(OMPBuilder.createTargetData(
10957 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10958 CustomMapperCB,
10959 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
10960 CGF.Builder.restoreIP(AfterIP);
10961}
10962
10964 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10965 const Expr *Device) {
10966 if (!CGF.HaveInsertPoint())
10967 return;
10968
10972 "Expecting either target enter, exit data, or update directives.");
10973
10975 llvm::Value *MapTypesArray = nullptr;
10976 llvm::Value *MapNamesArray = nullptr;
10977 // Generate the code for the opening of the data environment.
10978 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10979 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10980 // Emit device ID if any.
10981 llvm::Value *DeviceID = nullptr;
10982 if (Device) {
10983 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10984 CGF.Int64Ty, /*isSigned=*/true);
10985 } else {
10986 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10987 }
10988
10989 // Emit the number of elements in the offloading arrays.
10990 llvm::Constant *PointerNum =
10991 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10992
10993 // Source location for the ident struct
10994 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10995
10996 SmallVector<llvm::Value *, 13> OffloadingArgs(
10997 {RTLoc, DeviceID, PointerNum,
10998 InputInfo.BasePointersArray.emitRawPointer(CGF),
10999 InputInfo.PointersArray.emitRawPointer(CGF),
11000 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
11001 InputInfo.MappersArray.emitRawPointer(CGF)});
11002
11003 // Select the right runtime function call for each standalone
11004 // directive.
11005 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11006 RuntimeFunction RTLFn;
11007 switch (D.getDirectiveKind()) {
11008 case OMPD_target_enter_data:
11009 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11010 : OMPRTL___tgt_target_data_begin_mapper;
11011 break;
11012 case OMPD_target_exit_data:
11013 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11014 : OMPRTL___tgt_target_data_end_mapper;
11015 break;
11016 case OMPD_target_update:
11017 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11018 : OMPRTL___tgt_target_data_update_mapper;
11019 break;
11020 case OMPD_parallel:
11021 case OMPD_for:
11022 case OMPD_parallel_for:
11023 case OMPD_parallel_master:
11024 case OMPD_parallel_sections:
11025 case OMPD_for_simd:
11026 case OMPD_parallel_for_simd:
11027 case OMPD_cancel:
11028 case OMPD_cancellation_point:
11029 case OMPD_ordered:
11030 case OMPD_threadprivate:
11031 case OMPD_allocate:
11032 case OMPD_task:
11033 case OMPD_simd:
11034 case OMPD_tile:
11035 case OMPD_unroll:
11036 case OMPD_sections:
11037 case OMPD_section:
11038 case OMPD_single:
11039 case OMPD_master:
11040 case OMPD_critical:
11041 case OMPD_taskyield:
11042 case OMPD_barrier:
11043 case OMPD_taskwait:
11044 case OMPD_taskgroup:
11045 case OMPD_atomic:
11046 case OMPD_flush:
11047 case OMPD_depobj:
11048 case OMPD_scan:
11049 case OMPD_teams:
11050 case OMPD_target_data:
11051 case OMPD_distribute:
11052 case OMPD_distribute_simd:
11053 case OMPD_distribute_parallel_for:
11054 case OMPD_distribute_parallel_for_simd:
11055 case OMPD_teams_distribute:
11056 case OMPD_teams_distribute_simd:
11057 case OMPD_teams_distribute_parallel_for:
11058 case OMPD_teams_distribute_parallel_for_simd:
11059 case OMPD_declare_simd:
11060 case OMPD_declare_variant:
11061 case OMPD_begin_declare_variant:
11062 case OMPD_end_declare_variant:
11063 case OMPD_declare_target:
11064 case OMPD_end_declare_target:
11065 case OMPD_declare_reduction:
11066 case OMPD_declare_mapper:
11067 case OMPD_taskloop:
11068 case OMPD_taskloop_simd:
11069 case OMPD_master_taskloop:
11070 case OMPD_master_taskloop_simd:
11071 case OMPD_parallel_master_taskloop:
11072 case OMPD_parallel_master_taskloop_simd:
11073 case OMPD_target:
11074 case OMPD_target_simd:
11075 case OMPD_target_teams_distribute:
11076 case OMPD_target_teams_distribute_simd:
11077 case OMPD_target_teams_distribute_parallel_for:
11078 case OMPD_target_teams_distribute_parallel_for_simd:
11079 case OMPD_target_teams:
11080 case OMPD_target_parallel:
11081 case OMPD_target_parallel_for:
11082 case OMPD_target_parallel_for_simd:
11083 case OMPD_requires:
11084 case OMPD_metadirective:
11085 case OMPD_unknown:
11086 default:
11087 llvm_unreachable("Unexpected standalone target data directive.");
11088 break;
11089 }
11090 if (HasNowait) {
11091 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11092 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11093 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
11094 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
11095 }
11096 CGF.EmitRuntimeCall(
11097 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11098 OffloadingArgs);
11099 };
11100
11101 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11102 &MapNamesArray](CodeGenFunction &CGF,
11103 PrePostActionTy &) {
11104 // Fill up the arrays with all the mapped variables.
11105 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11107 MappableExprsHandler MEHandler(D, CGF);
11108 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
11109 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
11110 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
11111
11112 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11113 D.hasClausesOfKind<OMPNowaitClause>();
11114
11115 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11116 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
11117 CGF.VoidPtrTy, CGM.getPointerAlign());
11118 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
11119 CGM.getPointerAlign());
11120 InputInfo.SizesArray =
11121 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11122 InputInfo.MappersArray =
11123 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11124 MapTypesArray = Info.RTArgs.MapTypesArray;
11125 MapNamesArray = Info.RTArgs.MapNamesArray;
11126 if (RequiresOuterTask)
11127 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11128 else
11129 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11130 };
11131
11132 if (IfCond) {
11133 emitIfClause(CGF, IfCond, TargetThenGen,
11134 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11135 } else {
11136 RegionCodeGenTy ThenRCG(TargetThenGen);
11137 ThenRCG(CGF);
11138 }
11139}
11140
11141namespace {
11142 /// Kind of parameter in a function with 'declare simd' directive.
11143enum ParamKindTy {
11144 Linear,
11145 LinearRef,
11146 LinearUVal,
11147 LinearVal,
11148 Uniform,
11149 Vector,
11150};
11151/// Attribute set of the parameter.
11152struct ParamAttrTy {
11153 ParamKindTy Kind = Vector;
11154 llvm::APSInt StrideOrArg;
11155 llvm::APSInt Alignment;
11156 bool HasVarStride = false;
11157};
11158} // namespace
11159
11160static unsigned evaluateCDTSize(const FunctionDecl *FD,
11161 ArrayRef<ParamAttrTy> ParamAttrs) {
11162 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11163 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11164 // of that clause. The VLEN value must be power of 2.
11165 // In other case the notion of the function`s "characteristic data type" (CDT)
11166 // is used to compute the vector length.
11167 // CDT is defined in the following order:
11168 // a) For non-void function, the CDT is the return type.
11169 // b) If the function has any non-uniform, non-linear parameters, then the
11170 // CDT is the type of the first such parameter.
11171 // c) If the CDT determined by a) or b) above is struct, union, or class
11172 // type which is pass-by-value (except for the type that maps to the
11173 // built-in complex data type), the characteristic data type is int.
11174 // d) If none of the above three cases is applicable, the CDT is int.
11175 // The VLEN is then determined based on the CDT and the size of vector
11176 // register of that ISA for which current vector version is generated. The
11177 // VLEN is computed using the formula below:
11178 // VLEN = sizeof(vector_register) / sizeof(CDT),
11179 // where vector register size specified in section 3.2.1 Registers and the
11180 // Stack Frame of original AMD64 ABI document.
11181 QualType RetType = FD->getReturnType();
11182 if (RetType.isNull())
11183 return 0;
11184 ASTContext &C = FD->getASTContext();
11185 QualType CDT;
11186 if (!RetType.isNull() && !RetType->isVoidType()) {
11187 CDT = RetType;
11188 } else {
11189 unsigned Offset = 0;
11190 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11191 if (ParamAttrs[Offset].Kind == Vector)
11192 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
11193 ++Offset;
11194 }
11195 if (CDT.isNull()) {
11196 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11197 if (ParamAttrs[I + Offset].Kind == Vector) {
11198 CDT = FD->getParamDecl(I)->getType();
11199 break;
11200 }
11201 }
11202 }
11203 }
11204 if (CDT.isNull())
11205 CDT = C.IntTy;
11206 CDT = CDT->getCanonicalTypeUnqualified();
11207 if (CDT->isRecordType() || CDT->isUnionType())
11208 CDT = C.IntTy;
11209 return C.getTypeSize(CDT);
11210}
11211
11212/// Mangle the parameter part of the vector function name according to
11213/// their OpenMP classification. The mangling function is defined in
11214/// section 4.5 of the AAVFABI(2021Q1).
11215static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11216 SmallString<256> Buffer;
11217 llvm::raw_svector_ostream Out(Buffer);
11218 for (const auto &ParamAttr : ParamAttrs) {
11219 switch (ParamAttr.Kind) {
11220 case Linear:
11221 Out << 'l';
11222 break;
11223 case LinearRef:
11224 Out << 'R';
11225 break;
11226 case LinearUVal:
11227 Out << 'U';
11228 break;
11229 case LinearVal:
11230 Out << 'L';
11231 break;
11232 case Uniform:
11233 Out << 'u';
11234 break;
11235 case Vector:
11236 Out << 'v';
11237 break;
11238 }
11239 if (ParamAttr.HasVarStride)
11240 Out << "s" << ParamAttr.StrideOrArg;
11241 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11242 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11243 // Don't print the step value if it is not present or if it is
11244 // equal to 1.
11245 if (ParamAttr.StrideOrArg < 0)
11246 Out << 'n' << -ParamAttr.StrideOrArg;
11247 else if (ParamAttr.StrideOrArg != 1)
11248 Out << ParamAttr.StrideOrArg;
11249 }
11250
11251 if (!!ParamAttr.Alignment)
11252 Out << 'a' << ParamAttr.Alignment;
11253 }
11254
11255 return std::string(Out.str());
11256}
11257
11258static void
11259emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11260 const llvm::APSInt &VLENVal,
11261 ArrayRef<ParamAttrTy> ParamAttrs,
11262 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11263 struct ISADataTy {
11264 char ISA;
11265 unsigned VecRegSize;
11266 };
11267 ISADataTy ISAData[] = {
11268 {
11269 'b', 128
11270 }, // SSE
11271 {
11272 'c', 256
11273 }, // AVX
11274 {
11275 'd', 256
11276 }, // AVX2
11277 {
11278 'e', 512
11279 }, // AVX512
11280 };
11282 switch (State) {
11283 case OMPDeclareSimdDeclAttr::BS_Undefined:
11284 Masked.push_back('N');
11285 Masked.push_back('M');
11286 break;
11287 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11288 Masked.push_back('N');
11289 break;
11290 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11291 Masked.push_back('M');
11292 break;
11293 }
11294 for (char Mask : Masked) {
11295 for (const ISADataTy &Data : ISAData) {
11296 SmallString<256> Buffer;
11297 llvm::raw_svector_ostream Out(Buffer);
11298 Out << "_ZGV" << Data.ISA << Mask;
11299 if (!VLENVal) {
11300 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11301 assert(NumElts && "Non-zero simdlen/cdtsize expected");
11302 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11303 } else {
11304 Out << VLENVal;
11305 }
11306 Out << mangleVectorParameters(ParamAttrs);
11307 Out << '_' << Fn->getName();
11308 Fn->addFnAttr(Out.str());
11309 }
11310 }
11311}
11312
11313// This are the Functions that are needed to mangle the name of the
11314// vector functions generated by the compiler, according to the rules
11315// defined in the "Vector Function ABI specifications for AArch64",
11316// available at
11317// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11318
11319/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11320static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11321 QT = QT.getCanonicalType();
11322
11323 if (QT->isVoidType())
11324 return false;
11325
11326 if (Kind == ParamKindTy::Uniform)
11327 return false;
11328
11329 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
11330 return false;
11331
11332 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11333 !QT->isReferenceType())
11334 return false;
11335
11336 return true;
11337}
11338
11339/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11341 QT = QT.getCanonicalType();
11342 unsigned Size = C.getTypeSize(QT);
11343
11344 // Only scalars and complex within 16 bytes wide set PVB to true.
11345 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11346 return false;
11347
11348 if (QT->isFloatingType())
11349 return true;
11350
11351 if (QT->isIntegerType())
11352 return true;
11353
11354 if (QT->isPointerType())
11355 return true;
11356
11357 // TODO: Add support for complex types (section 3.1.2, item 2).
11358
11359 return false;
11360}
11361
11362/// Computes the lane size (LS) of a return type or of an input parameter,
11363/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11364/// TODO: Add support for references, section 3.2.1, item 1.
11365static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11366 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11368 if (getAArch64PBV(PTy, C))
11369 return C.getTypeSize(PTy);
11370 }
11371 if (getAArch64PBV(QT, C))
11372 return C.getTypeSize(QT);
11373
11374 return C.getTypeSize(C.getUIntPtrType());
11375}
11376
11377// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11378// signature of the scalar function, as defined in 3.2.2 of the
11379// AAVFABI.
11380static std::tuple<unsigned, unsigned, bool>
11382 QualType RetType = FD->getReturnType().getCanonicalType();
11383
11384 ASTContext &C = FD->getASTContext();
11385
11386 bool OutputBecomesInput = false;
11387
11389 if (!RetType->isVoidType()) {
11390 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11391 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11392 OutputBecomesInput = true;
11393 }
11394 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11396 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11397 }
11398
11399 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
11400 // The LS of a function parameter / return value can only be a power
11401 // of 2, starting from 8 bits, up to 128.
11402 assert(llvm::all_of(Sizes,
11403 [](unsigned Size) {
11404 return Size == 8 || Size == 16 || Size == 32 ||
11405 Size == 64 || Size == 128;
11406 }) &&
11407 "Invalid size");
11408
11409 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
11410 OutputBecomesInput);
11411}
11412
11413// Function used to add the attribute. The parameter `VLEN` is
11414// templated to allow the use of "x" when targeting scalable functions
11415// for SVE.
11416template <typename T>
11417static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11418 char ISA, StringRef ParSeq,
11419 StringRef MangledName, bool OutputBecomesInput,
11420 llvm::Function *Fn) {
11421 SmallString<256> Buffer;
11422 llvm::raw_svector_ostream Out(Buffer);
11423 Out << Prefix << ISA << LMask << VLEN;
11424 if (OutputBecomesInput)
11425 Out << "v";
11426 Out << ParSeq << "_" << MangledName;
11427 Fn->addFnAttr(Out.str());
11428}
11429
11430// Helper function to generate the Advanced SIMD names depending on
11431// the value of the NDS when simdlen is not present.
11432static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11433 StringRef Prefix, char ISA,
11434 StringRef ParSeq, StringRef MangledName,
11435 bool OutputBecomesInput,
11436 llvm::Function *Fn) {
11437 switch (NDS) {
11438 case 8:
11439 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11440 OutputBecomesInput, Fn);
11441 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11442 OutputBecomesInput, Fn);
11443 break;
11444 case 16:
11445 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11446 OutputBecomesInput, Fn);
11447 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11448 OutputBecomesInput, Fn);
11449 break;
11450 case 32:
11451 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11452 OutputBecomesInput, Fn);
11453 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11454 OutputBecomesInput, Fn);
11455 break;
11456 case 64:
11457 case 128:
11458 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11459 OutputBecomesInput, Fn);
11460 break;
11461 default:
11462 llvm_unreachable("Scalar type is too wide.");
11463 }
11464}
11465
11466/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11468 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11469 ArrayRef<ParamAttrTy> ParamAttrs,
11470 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11471 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11472
11473 // Get basic data for building the vector signature.
11474 const auto Data = getNDSWDS(FD, ParamAttrs);
11475 const unsigned NDS = std::get<0>(Data);
11476 const unsigned WDS = std::get<1>(Data);
11477 const bool OutputBecomesInput = std::get<2>(Data);
11478
11479 // Check the values provided via `simdlen` by the user.
11480 // 1. A `simdlen(1)` doesn't produce vector signatures,
11481 if (UserVLEN == 1) {
11482 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11484 "The clause simdlen(1) has no effect when targeting aarch64.");
11485 CGM.getDiags().Report(SLoc, DiagID);
11486 return;
11487 }
11488
11489 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11490 // Advanced SIMD output.
11491 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11492 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11493 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11494 "power of 2 when targeting Advanced SIMD.");
11495 CGM.getDiags().Report(SLoc, DiagID);
11496 return;
11497 }
11498
11499 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11500 // limits.
11501 if (ISA == 's' && UserVLEN != 0) {
11502 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11503 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11504 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11505 "lanes in the architectural constraints "
11506 "for SVE (min is 128-bit, max is "
11507 "2048-bit, by steps of 128-bit)");
11508 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11509 return;
11510 }
11511 }
11512
11513 // Sort out parameter sequence.
11514 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11515 StringRef Prefix = "_ZGV";
11516 // Generate simdlen from user input (if any).
11517 if (UserVLEN) {
11518 if (ISA == 's') {
11519 // SVE generates only a masked function.
11520 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11521 OutputBecomesInput, Fn);
11522 } else {
11523 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11524 // Advanced SIMD generates one or two functions, depending on
11525 // the `[not]inbranch` clause.
11526 switch (State) {
11527 case OMPDeclareSimdDeclAttr::BS_Undefined:
11528 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11529 OutputBecomesInput, Fn);
11530 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11531 OutputBecomesInput, Fn);
11532 break;
11533 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11534 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11535 OutputBecomesInput, Fn);
11536 break;
11537 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11538 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11539 OutputBecomesInput, Fn);
11540 break;
11541 }
11542 }
11543 } else {
11544 // If no user simdlen is provided, follow the AAVFABI rules for
11545 // generating the vector length.
11546 if (ISA == 's') {
11547 // SVE, section 3.4.1, item 1.
11548 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11549 OutputBecomesInput, Fn);
11550 } else {
11551 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11552 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11553 // two vector names depending on the use of the clause
11554 // `[not]inbranch`.
11555 switch (State) {
11556 case OMPDeclareSimdDeclAttr::BS_Undefined:
11557 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11558 OutputBecomesInput, Fn);
11559 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11560 OutputBecomesInput, Fn);
11561 break;
11562 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11563 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11564 OutputBecomesInput, Fn);
11565 break;
11566 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11567 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11568 OutputBecomesInput, Fn);
11569 break;
11570 }
11571 }
11572 }
11573}
11574
11576 llvm::Function *Fn) {
11577 ASTContext &C = CGM.getContext();
11578 FD = FD->getMostRecentDecl();
11579 while (FD) {
11580 // Map params to their positions in function decl.
11581 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11582 if (isa<CXXMethodDecl>(FD))
11583 ParamPositions.try_emplace(FD, 0);
11584 unsigned ParamPos = ParamPositions.size();
11585 for (const ParmVarDecl *P : FD->parameters()) {
11586 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11587 ++ParamPos;
11588 }
11589 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11590 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11591 // Mark uniform parameters.
11592 for (const Expr *E : Attr->uniforms()) {
11593 E = E->IgnoreParenImpCasts();
11594 unsigned Pos;
11595 if (isa<CXXThisExpr>(E)) {
11596 Pos = ParamPositions[FD];
11597 } else {
11598 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11599 ->getCanonicalDecl();
11600 auto It = ParamPositions.find(PVD);
11601 assert(It != ParamPositions.end() && "Function parameter not found");
11602 Pos = It->second;
11603 }
11604 ParamAttrs[Pos].Kind = Uniform;
11605 }
11606 // Get alignment info.
11607 auto *NI = Attr->alignments_begin();
11608 for (const Expr *E : Attr->aligneds()) {
11609 E = E->IgnoreParenImpCasts();
11610 unsigned Pos;
11611 QualType ParmTy;
11612 if (isa<CXXThisExpr>(E)) {
11613 Pos = ParamPositions[FD];
11614 ParmTy = E->getType();
11615 } else {
11616 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11617 ->getCanonicalDecl();
11618 auto It = ParamPositions.find(PVD);
11619 assert(It != ParamPositions.end() && "Function parameter not found");
11620 Pos = It->second;
11621 ParmTy = PVD->getType();
11622 }
11623 ParamAttrs[Pos].Alignment =
11624 (*NI)
11625 ? (*NI)->EvaluateKnownConstInt(C)
11626 : llvm::APSInt::getUnsigned(
11627 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11628 .getQuantity());
11629 ++NI;
11630 }
11631 // Mark linear parameters.
11632 auto *SI = Attr->steps_begin();
11633 auto *MI = Attr->modifiers_begin();
11634 for (const Expr *E : Attr->linears()) {
11635 E = E->IgnoreParenImpCasts();
11636 unsigned Pos;
11637 bool IsReferenceType = false;
11638 // Rescaling factor needed to compute the linear parameter
11639 // value in the mangled name.
11640 unsigned PtrRescalingFactor = 1;
11641 if (isa<CXXThisExpr>(E)) {
11642 Pos = ParamPositions[FD];
11643 auto *P = cast<PointerType>(E->getType());
11644 PtrRescalingFactor = CGM.getContext()
11645 .getTypeSizeInChars(P->getPointeeType())
11646 .getQuantity();
11647 } else {
11648 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11649 ->getCanonicalDecl();
11650 auto It = ParamPositions.find(PVD);
11651 assert(It != ParamPositions.end() && "Function parameter not found");
11652 Pos = It->second;
11653 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11654 PtrRescalingFactor = CGM.getContext()
11655 .getTypeSizeInChars(P->getPointeeType())
11656 .getQuantity();
11657 else if (PVD->getType()->isReferenceType()) {
11658 IsReferenceType = true;
11659 PtrRescalingFactor =
11660 CGM.getContext()
11661 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11662 .getQuantity();
11663 }
11664 }
11665 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11666 if (*MI == OMPC_LINEAR_ref)
11667 ParamAttr.Kind = LinearRef;
11668 else if (*MI == OMPC_LINEAR_uval)
11669 ParamAttr.Kind = LinearUVal;
11670 else if (IsReferenceType)
11671 ParamAttr.Kind = LinearVal;
11672 else
11673 ParamAttr.Kind = Linear;
11674 // Assuming a stride of 1, for `linear` without modifiers.
11675 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11676 if (*SI) {
11678 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11679 if (const auto *DRE =
11680 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11681 if (const auto *StridePVD =
11682 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11683 ParamAttr.HasVarStride = true;
11684 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11685 assert(It != ParamPositions.end() &&
11686 "Function parameter not found");
11687 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11688 }
11689 }
11690 } else {
11691 ParamAttr.StrideOrArg = Result.Val.getInt();
11692 }
11693 }
11694 // If we are using a linear clause on a pointer, we need to
11695 // rescale the value of linear_step with the byte size of the
11696 // pointee type.
11697 if (!ParamAttr.HasVarStride &&
11698 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11699 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11700 ++SI;
11701 ++MI;
11702 }
11703 llvm::APSInt VLENVal;
11704 SourceLocation ExprLoc;
11705 const Expr *VLENExpr = Attr->getSimdlen();
11706 if (VLENExpr) {
11707 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11708 ExprLoc = VLENExpr->getExprLoc();
11709 }
11710 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11711 if (CGM.getTriple().isX86()) {
11712 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11713 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11714 unsigned VLEN = VLENVal.getExtValue();
11715 StringRef MangledName = Fn->getName();
11716 if (CGM.getTarget().hasFeature("sve"))
11717 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11718 MangledName, 's', 128, Fn, ExprLoc);
11719 else if (CGM.getTarget().hasFeature("neon"))
11720 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11721 MangledName, 'n', 128, Fn, ExprLoc);
11722 }
11723 }
11724 FD = FD->getPreviousDecl();
11725 }
11726}
11727
11728namespace {
11729/// Cleanup action for doacross support.
11730class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11731public:
11732 static const int DoacrossFinArgs = 2;
11733
11734private:
11735 llvm::FunctionCallee RTLFn;
11736 llvm::Value *Args[DoacrossFinArgs];
11737
11738public:
11739 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11740 ArrayRef<llvm::Value *> CallArgs)
11741 : RTLFn(RTLFn) {
11742 assert(CallArgs.size() == DoacrossFinArgs);
11743 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11744 }
11745 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11746 if (!CGF.HaveInsertPoint())
11747 return;
11748 CGF.EmitRuntimeCall(RTLFn, Args);
11749 }
11750};
11751} // namespace
11752
11754 const OMPLoopDirective &D,
11755 ArrayRef<Expr *> NumIterations) {
11756 if (!CGF.HaveInsertPoint())
11757 return;
11758
11759 ASTContext &C = CGM.getContext();
11760 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11761 RecordDecl *RD;
11762 if (KmpDimTy.isNull()) {
11763 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11764 // kmp_int64 lo; // lower
11765 // kmp_int64 up; // upper
11766 // kmp_int64 st; // stride
11767 // };
11768 RD = C.buildImplicitRecord("kmp_dim");
11769 RD->startDefinition();
11770 addFieldToRecordDecl(C, RD, Int64Ty);
11771 addFieldToRecordDecl(C, RD, Int64Ty);
11772 addFieldToRecordDecl(C, RD, Int64Ty);
11773 RD->completeDefinition();
11774 KmpDimTy = C.getCanonicalTagType(RD);
11775 } else {
11776 RD = KmpDimTy->castAsRecordDecl();
11777 }
11778 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11779 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11781
11782 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11783 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11784 enum { LowerFD = 0, UpperFD, StrideFD };
11785 // Fill dims with data.
11786 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11787 LValue DimsLVal = CGF.MakeAddrLValue(
11788 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11789 // dims.upper = num_iterations;
11790 LValue UpperLVal = CGF.EmitLValueForField(
11791 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11792 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11793 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11794 Int64Ty, NumIterations[I]->getExprLoc());
11795 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11796 // dims.stride = 1;
11797 LValue StrideLVal = CGF.EmitLValueForField(
11798 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11799 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11800 StrideLVal);
11801 }
11802
11803 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11804 // kmp_int32 num_dims, struct kmp_dim * dims);
11805 llvm::Value *Args[] = {
11806 emitUpdateLocation(CGF, D.getBeginLoc()),
11807 getThreadID(CGF, D.getBeginLoc()),
11808 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11810 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11811 CGM.VoidPtrTy)};
11812
11813 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11814 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11815 CGF.EmitRuntimeCall(RTLFn, Args);
11816 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11817 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11818 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11819 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11820 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11821 llvm::ArrayRef(FiniArgs));
11822}
11823
11824template <typename T>
11826 const T *C, llvm::Value *ULoc,
11827 llvm::Value *ThreadID) {
11828 QualType Int64Ty =
11829 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11830 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11832 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11833 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11834 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11835 const Expr *CounterVal = C->getLoopData(I);
11836 assert(CounterVal);
11837 llvm::Value *CntVal = CGF.EmitScalarConversion(
11838 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11839 CounterVal->getExprLoc());
11840 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11841 /*Volatile=*/false, Int64Ty);
11842 }
11843 llvm::Value *Args[] = {
11844 ULoc, ThreadID,
11845 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11846 llvm::FunctionCallee RTLFn;
11847 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11848 OMPDoacrossKind<T> ODK;
11849 if (ODK.isSource(C)) {
11850 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11851 OMPRTL___kmpc_doacross_post);
11852 } else {
11853 assert(ODK.isSink(C) && "Expect sink modifier.");
11854 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11855 OMPRTL___kmpc_doacross_wait);
11856 }
11857 CGF.EmitRuntimeCall(RTLFn, Args);
11858}
11859
11861 const OMPDependClause *C) {
11863 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11864 getThreadID(CGF, C->getBeginLoc()));
11865}
11866
11868 const OMPDoacrossClause *C) {
11870 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11871 getThreadID(CGF, C->getBeginLoc()));
11872}
11873
11875 llvm::FunctionCallee Callee,
11876 ArrayRef<llvm::Value *> Args) const {
11877 assert(Loc.isValid() && "Outlined function call location must be valid.");
11879
11880 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11881 if (Fn->doesNotThrow()) {
11882 CGF.EmitNounwindRuntimeCall(Fn, Args);
11883 return;
11884 }
11885 }
11886 CGF.EmitRuntimeCall(Callee, Args);
11887}
11888
11890 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11891 ArrayRef<llvm::Value *> Args) const {
11892 emitCall(CGF, Loc, OutlinedFn, Args);
11893}
11894
11896 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11897 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11899}
11900
11902 const VarDecl *NativeParam,
11903 const VarDecl *TargetParam) const {
11904 return CGF.GetAddrOfLocalVar(NativeParam);
11905}
11906
11907/// Return allocator value from expression, or return a null allocator (default
11908/// when no allocator specified).
11909static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11910 const Expr *Allocator) {
11911 llvm::Value *AllocVal;
11912 if (Allocator) {
11913 AllocVal = CGF.EmitScalarExpr(Allocator);
11914 // According to the standard, the original allocator type is a enum
11915 // (integer). Convert to pointer type, if required.
11916 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11917 CGF.getContext().VoidPtrTy,
11918 Allocator->getExprLoc());
11919 } else {
11920 // If no allocator specified, it defaults to the null allocator.
11921 AllocVal = llvm::Constant::getNullValue(
11923 }
11924 return AllocVal;
11925}
11926
11927/// Return the alignment from an allocate directive if present.
11928static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11929 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11930
11931 if (!AllocateAlignment)
11932 return nullptr;
11933
11934 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11935}
11936
11938 const VarDecl *VD) {
11939 if (!VD)
11940 return Address::invalid();
11941 Address UntiedAddr = Address::invalid();
11942 Address UntiedRealAddr = Address::invalid();
11943 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11944 if (It != FunctionToUntiedTaskStackMap.end()) {
11945 const UntiedLocalVarsAddressesMap &UntiedData =
11946 UntiedLocalVarsStack[It->second];
11947 auto I = UntiedData.find(VD);
11948 if (I != UntiedData.end()) {
11949 UntiedAddr = I->second.first;
11950 UntiedRealAddr = I->second.second;
11951 }
11952 }
11953 const VarDecl *CVD = VD->getCanonicalDecl();
11954 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11955 // Use the default allocation.
11956 if (!isAllocatableDecl(VD))
11957 return UntiedAddr;
11958 llvm::Value *Size;
11959 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11960 if (CVD->getType()->isVariablyModifiedType()) {
11961 Size = CGF.getTypeSize(CVD->getType());
11962 // Align the size: ((size + align - 1) / align) * align
11963 Size = CGF.Builder.CreateNUWAdd(
11964 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11965 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11966 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11967 } else {
11968 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11969 Size = CGM.getSize(Sz.alignTo(Align));
11970 }
11971 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11972 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11973 const Expr *Allocator = AA->getAllocator();
11974 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11975 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11977 Args.push_back(ThreadID);
11978 if (Alignment)
11979 Args.push_back(Alignment);
11980 Args.push_back(Size);
11981 Args.push_back(AllocVal);
11982 llvm::omp::RuntimeFunction FnID =
11983 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11984 llvm::Value *Addr = CGF.EmitRuntimeCall(
11985 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11986 getName({CVD->getName(), ".void.addr"}));
11987 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11988 CGM.getModule(), OMPRTL___kmpc_free);
11989 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11991 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11992 if (UntiedAddr.isValid())
11993 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11994
11995 // Cleanup action for allocate support.
11996 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11997 llvm::FunctionCallee RTLFn;
11998 SourceLocation::UIntTy LocEncoding;
11999 Address Addr;
12000 const Expr *AllocExpr;
12001
12002 public:
12003 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12004 SourceLocation::UIntTy LocEncoding, Address Addr,
12005 const Expr *AllocExpr)
12006 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12007 AllocExpr(AllocExpr) {}
12008 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12009 if (!CGF.HaveInsertPoint())
12010 return;
12011 llvm::Value *Args[3];
12012 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12013 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12015 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
12016 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12017 Args[2] = AllocVal;
12018 CGF.EmitRuntimeCall(RTLFn, Args);
12019 }
12020 };
12021 Address VDAddr =
12022 UntiedRealAddr.isValid()
12023 ? UntiedRealAddr
12024 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12025 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12026 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12027 VDAddr, Allocator);
12028 if (UntiedRealAddr.isValid())
12029 if (auto *Region =
12030 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12031 Region->emitUntiedSwitch(CGF);
12032 return VDAddr;
12033 }
12034 return UntiedAddr;
12035}
12036
12038 const VarDecl *VD) const {
12039 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12040 if (It == FunctionToUntiedTaskStackMap.end())
12041 return false;
12042 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12043}
12044
12046 CodeGenModule &CGM, const OMPLoopDirective &S)
12047 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12048 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12049 if (!NeedToPush)
12050 return;
12052 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12053 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12054 for (const Stmt *Ref : C->private_refs()) {
12055 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12056 const ValueDecl *VD;
12057 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12058 VD = DRE->getDecl();
12059 } else {
12060 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12061 assert((ME->isImplicitCXXThis() ||
12062 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
12063 "Expected member of current class.");
12064 VD = ME->getMemberDecl();
12065 }
12066 DS.insert(VD);
12067 }
12068 }
12069}
12070
12072 if (!NeedToPush)
12073 return;
12074 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12075}
12076
12078 CodeGenFunction &CGF,
12079 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12080 std::pair<Address, Address>> &LocalVars)
12081 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12082 if (!NeedToPush)
12083 return;
12084 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12085 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12086 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12087}
12088
12090 if (!NeedToPush)
12091 return;
12092 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12093}
12094
12096 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12097
12098 return llvm::any_of(
12099 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12100 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12101}
12102
12103void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12104 const OMPExecutableDirective &S,
12105 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12106 const {
12107 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12108 // Vars in target/task regions must be excluded completely.
12109 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12110 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12112 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12113 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12114 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12115 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12116 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12117 }
12118 }
12119 // Exclude vars in private clauses.
12120 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12121 for (const Expr *Ref : C->varlist()) {
12122 if (!Ref->getType()->isScalarType())
12123 continue;
12124 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12125 if (!DRE)
12126 continue;
12127 NeedToCheckForLPCs.insert(DRE->getDecl());
12128 }
12129 }
12130 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12131 for (const Expr *Ref : C->varlist()) {
12132 if (!Ref->getType()->isScalarType())
12133 continue;
12134 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12135 if (!DRE)
12136 continue;
12137 NeedToCheckForLPCs.insert(DRE->getDecl());
12138 }
12139 }
12140 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12141 for (const Expr *Ref : C->varlist()) {
12142 if (!Ref->getType()->isScalarType())
12143 continue;
12144 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12145 if (!DRE)
12146 continue;
12147 NeedToCheckForLPCs.insert(DRE->getDecl());
12148 }
12149 }
12150 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12151 for (const Expr *Ref : C->varlist()) {
12152 if (!Ref->getType()->isScalarType())
12153 continue;
12154 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12155 if (!DRE)
12156 continue;
12157 NeedToCheckForLPCs.insert(DRE->getDecl());
12158 }
12159 }
12160 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12161 for (const Expr *Ref : C->varlist()) {
12162 if (!Ref->getType()->isScalarType())
12163 continue;
12164 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12165 if (!DRE)
12166 continue;
12167 NeedToCheckForLPCs.insert(DRE->getDecl());
12168 }
12169 }
12170 for (const Decl *VD : NeedToCheckForLPCs) {
12171 for (const LastprivateConditionalData &Data :
12172 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12173 if (Data.DeclToUniqueName.count(VD) > 0) {
12174 if (!Data.Disabled)
12175 NeedToAddForLPCsAsDisabled.insert(VD);
12176 break;
12177 }
12178 }
12179 }
12180}
12181
12182CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12183 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12184 : CGM(CGF.CGM),
12185 Action((CGM.getLangOpts().OpenMP >= 50 &&
12186 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12187 [](const OMPLastprivateClause *C) {
12188 return C->getKind() ==
12189 OMPC_LASTPRIVATE_conditional;
12190 }))
12191 ? ActionToDo::PushAsLastprivateConditional
12192 : ActionToDo::DoNotPush) {
12193 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12194 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12195 return;
12196 assert(Action == ActionToDo::PushAsLastprivateConditional &&
12197 "Expected a push action.");
12199 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12200 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12201 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12202 continue;
12203
12204 for (const Expr *Ref : C->varlist()) {
12205 Data.DeclToUniqueName.insert(std::make_pair(
12206 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12207 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12208 }
12209 }
12210 Data.IVLVal = IVLVal;
12211 Data.Fn = CGF.CurFn;
12212}
12213
12214CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12216 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12217 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
12218 if (CGM.getLangOpts().OpenMP < 50)
12219 return;
12220 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12221 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12222 if (!NeedToAddForLPCsAsDisabled.empty()) {
12223 Action = ActionToDo::DisableLastprivateConditional;
12224 LastprivateConditionalData &Data =
12226 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12227 Data.DeclToUniqueName.try_emplace(VD);
12228 Data.Fn = CGF.CurFn;
12229 Data.Disabled = true;
12230 }
12231}
12232
12233CGOpenMPRuntime::LastprivateConditionalRAII
12235 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12236 return LastprivateConditionalRAII(CGF, S);
12237}
12238
12240 if (CGM.getLangOpts().OpenMP < 50)
12241 return;
12242 if (Action == ActionToDo::DisableLastprivateConditional) {
12243 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12244 "Expected list of disabled private vars.");
12245 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12246 }
12247 if (Action == ActionToDo::PushAsLastprivateConditional) {
12248 assert(
12249 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
12250 "Expected list of lastprivate conditional vars.");
12251 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12252 }
12253}
12254
12256 const VarDecl *VD) {
12257 ASTContext &C = CGM.getContext();
12258 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12259 QualType NewType;
12260 const FieldDecl *VDField;
12261 const FieldDecl *FiredField;
12262 LValue BaseLVal;
12263 auto VI = I->getSecond().find(VD);
12264 if (VI == I->getSecond().end()) {
12265 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12266 RD->startDefinition();
12267 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12268 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12269 RD->completeDefinition();
12270 NewType = C.getCanonicalTagType(RD);
12271 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12272 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12273 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12274 } else {
12275 NewType = std::get<0>(VI->getSecond());
12276 VDField = std::get<1>(VI->getSecond());
12277 FiredField = std::get<2>(VI->getSecond());
12278 BaseLVal = std::get<3>(VI->getSecond());
12279 }
12280 LValue FiredLVal =
12281 CGF.EmitLValueForField(BaseLVal, FiredField);
12283 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12284 FiredLVal);
12285 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
12286}
12287
12288namespace {
12289/// Checks if the lastprivate conditional variable is referenced in LHS.
12290class LastprivateConditionalRefChecker final
12291 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12293 const Expr *FoundE = nullptr;
12294 const Decl *FoundD = nullptr;
12295 StringRef UniqueDeclName;
12296 LValue IVLVal;
12297 llvm::Function *FoundFn = nullptr;
12298 SourceLocation Loc;
12299
12300public:
12301 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12303 llvm::reverse(LPM)) {
12304 auto It = D.DeclToUniqueName.find(E->getDecl());
12305 if (It == D.DeclToUniqueName.end())
12306 continue;
12307 if (D.Disabled)
12308 return false;
12309 FoundE = E;
12310 FoundD = E->getDecl()->getCanonicalDecl();
12311 UniqueDeclName = It->second;
12312 IVLVal = D.IVLVal;
12313 FoundFn = D.Fn;
12314 break;
12315 }
12316 return FoundE == E;
12317 }
12318 bool VisitMemberExpr(const MemberExpr *E) {
12320 return false;
12322 llvm::reverse(LPM)) {
12323 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12324 if (It == D.DeclToUniqueName.end())
12325 continue;
12326 if (D.Disabled)
12327 return false;
12328 FoundE = E;
12329 FoundD = E->getMemberDecl()->getCanonicalDecl();
12330 UniqueDeclName = It->second;
12331 IVLVal = D.IVLVal;
12332 FoundFn = D.Fn;
12333 break;
12334 }
12335 return FoundE == E;
12336 }
12337 bool VisitStmt(const Stmt *S) {
12338 for (const Stmt *Child : S->children()) {
12339 if (!Child)
12340 continue;
12341 if (const auto *E = dyn_cast<Expr>(Child))
12342 if (!E->isGLValue())
12343 continue;
12344 if (Visit(Child))
12345 return true;
12346 }
12347 return false;
12348 }
12349 explicit LastprivateConditionalRefChecker(
12350 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12351 : LPM(LPM) {}
12352 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12353 getFoundData() const {
12354 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12355 }
12356};
12357} // namespace
12358
12360 LValue IVLVal,
12361 StringRef UniqueDeclName,
12362 LValue LVal,
12363 SourceLocation Loc) {
12364 // Last updated loop counter for the lastprivate conditional var.
12365 // int<xx> last_iv = 0;
12366 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12367 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12368 LLIVTy, getName({UniqueDeclName, "iv"}));
12369 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12370 IVLVal.getAlignment().getAsAlign());
12371 LValue LastIVLVal =
12372 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
12373
12374 // Last value of the lastprivate conditional.
12375 // decltype(priv_a) last_a;
12376 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12377 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12378 cast<llvm::GlobalVariable>(Last)->setAlignment(
12379 LVal.getAlignment().getAsAlign());
12380 LValue LastLVal =
12381 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
12382
12383 // Global loop counter. Required to handle inner parallel-for regions.
12384 // iv
12385 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12386
12387 // #pragma omp critical(a)
12388 // if (last_iv <= iv) {
12389 // last_iv = iv;
12390 // last_a = priv_a;
12391 // }
12392 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12393 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12394 Action.Enter(CGF);
12395 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12396 // (last_iv <= iv) ? Check if the variable is updated and store new
12397 // value in global var.
12398 llvm::Value *CmpRes;
12399 if (IVLVal.getType()->isSignedIntegerType()) {
12400 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12401 } else {
12402 assert(IVLVal.getType()->isUnsignedIntegerType() &&
12403 "Loop iteration variable must be integer.");
12404 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12405 }
12406 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12407 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12408 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12409 // {
12410 CGF.EmitBlock(ThenBB);
12411
12412 // last_iv = iv;
12413 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12414
12415 // last_a = priv_a;
12416 switch (CGF.getEvaluationKind(LVal.getType())) {
12417 case TEK_Scalar: {
12418 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12419 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12420 break;
12421 }
12422 case TEK_Complex: {
12423 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12424 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12425 break;
12426 }
12427 case TEK_Aggregate:
12428 llvm_unreachable(
12429 "Aggregates are not supported in lastprivate conditional.");
12430 }
12431 // }
12432 CGF.EmitBranch(ExitBB);
12433 // There is no need to emit line number for unconditional branch.
12435 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12436 };
12437
12438 if (CGM.getLangOpts().OpenMPSimd) {
12439 // Do not emit as a critical region as no parallel region could be emitted.
12440 RegionCodeGenTy ThenRCG(CodeGen);
12441 ThenRCG(CGF);
12442 } else {
12443 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12444 }
12445}
12446
12448 const Expr *LHS) {
12449 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12450 return;
12451 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12452 if (!Checker.Visit(LHS))
12453 return;
12454 const Expr *FoundE;
12455 const Decl *FoundD;
12456 StringRef UniqueDeclName;
12457 LValue IVLVal;
12458 llvm::Function *FoundFn;
12459 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12460 Checker.getFoundData();
12461 if (FoundFn != CGF.CurFn) {
12462 // Special codegen for inner parallel regions.
12463 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12464 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12465 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12466 "Lastprivate conditional is not found in outer region.");
12467 QualType StructTy = std::get<0>(It->getSecond());
12468 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12469 LValue PrivLVal = CGF.EmitLValue(FoundE);
12471 PrivLVal.getAddress(),
12472 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12473 CGF.ConvertTypeForMem(StructTy));
12474 LValue BaseLVal =
12475 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12476 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12477 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12478 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12479 FiredLVal, llvm::AtomicOrdering::Unordered,
12480 /*IsVolatile=*/true, /*isInit=*/false);
12481 return;
12482 }
12483
12484 // Private address of the lastprivate conditional in the current context.
12485 // priv_a
12486 LValue LVal = CGF.EmitLValue(FoundE);
12487 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12488 FoundE->getExprLoc());
12489}
12490
12493 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12494 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12495 return;
12496 auto Range = llvm::reverse(LastprivateConditionalStack);
12497 auto It = llvm::find_if(
12498 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12499 if (It == Range.end() || It->Fn != CGF.CurFn)
12500 return;
12501 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12502 assert(LPCI != LastprivateConditionalToTypes.end() &&
12503 "Lastprivates must be registered already.");
12505 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12506 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12507 for (const auto &Pair : It->DeclToUniqueName) {
12508 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12509 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12510 continue;
12511 auto I = LPCI->getSecond().find(Pair.first);
12512 assert(I != LPCI->getSecond().end() &&
12513 "Lastprivate must be rehistered already.");
12514 // bool Cmp = priv_a.Fired != 0;
12515 LValue BaseLVal = std::get<3>(I->getSecond());
12516 LValue FiredLVal =
12517 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12518 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12519 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12520 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12521 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12522 // if (Cmp) {
12523 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12524 CGF.EmitBlock(ThenBB);
12525 Address Addr = CGF.GetAddrOfLocalVar(VD);
12526 LValue LVal;
12527 if (VD->getType()->isReferenceType())
12528 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12530 else
12531 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12533 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12534 D.getBeginLoc());
12536 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12537 // }
12538 }
12539}
12540
12542 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12543 SourceLocation Loc) {
12544 if (CGF.getLangOpts().OpenMP < 50)
12545 return;
12546 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12547 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12548 "Unknown lastprivate conditional variable.");
12549 StringRef UniqueName = It->second;
12550 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12551 // The variable was not updated in the region - exit.
12552 if (!GV)
12553 return;
12554 LValue LPLVal = CGF.MakeRawAddrLValue(
12555 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12556 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12557 CGF.EmitStoreOfScalar(Res, PrivLVal);
12558}
12559
12562 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12563 const RegionCodeGenTy &CodeGen) {
12564 llvm_unreachable("Not supported in SIMD-only mode");
12565}
12566
12569 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12570 const RegionCodeGenTy &CodeGen) {
12571 llvm_unreachable("Not supported in SIMD-only mode");
12572}
12573
12575 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12576 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12577 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12578 bool Tied, unsigned &NumberOfParts) {
12579 llvm_unreachable("Not supported in SIMD-only mode");
12580}
12581
12583 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
12584 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
12585 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
12586 OpenMPSeverityClauseKind Severity, const Expr *Message) {
12587 llvm_unreachable("Not supported in SIMD-only mode");
12588}
12589
12591 CodeGenFunction &CGF, StringRef CriticalName,
12592 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12593 const Expr *Hint) {
12594 llvm_unreachable("Not supported in SIMD-only mode");
12595}
12596
12598 const RegionCodeGenTy &MasterOpGen,
12599 SourceLocation Loc) {
12600 llvm_unreachable("Not supported in SIMD-only mode");
12601}
12602
12604 const RegionCodeGenTy &MasterOpGen,
12605 SourceLocation Loc,
12606 const Expr *Filter) {
12607 llvm_unreachable("Not supported in SIMD-only mode");
12608}
12609
12611 SourceLocation Loc) {
12612 llvm_unreachable("Not supported in SIMD-only mode");
12613}
12614
12616 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12617 SourceLocation Loc) {
12618 llvm_unreachable("Not supported in SIMD-only mode");
12619}
12620
12622 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12623 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12625 ArrayRef<const Expr *> AssignmentOps) {
12626 llvm_unreachable("Not supported in SIMD-only mode");
12627}
12628
12630 const RegionCodeGenTy &OrderedOpGen,
12631 SourceLocation Loc,
12632 bool IsThreads) {
12633 llvm_unreachable("Not supported in SIMD-only mode");
12634}
12635
12637 SourceLocation Loc,
12639 bool EmitChecks,
12640 bool ForceSimpleCall) {
12641 llvm_unreachable("Not supported in SIMD-only mode");
12642}
12643
12646 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12647 bool Ordered, const DispatchRTInput &DispatchValues) {
12648 llvm_unreachable("Not supported in SIMD-only mode");
12649}
12650
12652 SourceLocation Loc) {
12653 llvm_unreachable("Not supported in SIMD-only mode");
12654}
12655
12658 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12659 llvm_unreachable("Not supported in SIMD-only mode");
12660}
12661
12664 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12665 llvm_unreachable("Not supported in SIMD-only mode");
12666}
12667
12669 SourceLocation Loc,
12670 unsigned IVSize,
12671 bool IVSigned) {
12672 llvm_unreachable("Not supported in SIMD-only mode");
12673}
12674
12676 SourceLocation Loc,
12677 OpenMPDirectiveKind DKind) {
12678 llvm_unreachable("Not supported in SIMD-only mode");
12679}
12680
12682 SourceLocation Loc,
12683 unsigned IVSize, bool IVSigned,
12684 Address IL, Address LB,
12685 Address UB, Address ST) {
12686 llvm_unreachable("Not supported in SIMD-only mode");
12687}
12688
12690 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
12692 SourceLocation SeverityLoc, const Expr *Message,
12693 SourceLocation MessageLoc) {
12694 llvm_unreachable("Not supported in SIMD-only mode");
12695}
12696
12698 ProcBindKind ProcBind,
12699 SourceLocation Loc) {
12700 llvm_unreachable("Not supported in SIMD-only mode");
12701}
12702
12704 const VarDecl *VD,
12705 Address VDAddr,
12706 SourceLocation Loc) {
12707 llvm_unreachable("Not supported in SIMD-only mode");
12708}
12709
12711 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12712 CodeGenFunction *CGF) {
12713 llvm_unreachable("Not supported in SIMD-only mode");
12714}
12715
12717 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12718 llvm_unreachable("Not supported in SIMD-only mode");
12719}
12720
12723 SourceLocation Loc,
12724 llvm::AtomicOrdering AO) {
12725 llvm_unreachable("Not supported in SIMD-only mode");
12726}
12727
12729 const OMPExecutableDirective &D,
12730 llvm::Function *TaskFunction,
12731 QualType SharedsTy, Address Shareds,
12732 const Expr *IfCond,
12733 const OMPTaskDataTy &Data) {
12734 llvm_unreachable("Not supported in SIMD-only mode");
12735}
12736
12739 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12740 const Expr *IfCond, const OMPTaskDataTy &Data) {
12741 llvm_unreachable("Not supported in SIMD-only mode");
12742}
12743
12747 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12748 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12749 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12750 ReductionOps, Options);
12751}
12752
12755 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12756 llvm_unreachable("Not supported in SIMD-only mode");
12757}
12758
12760 SourceLocation Loc,
12761 bool IsWorksharingReduction) {
12762 llvm_unreachable("Not supported in SIMD-only mode");
12763}
12764
12766 SourceLocation Loc,
12767 ReductionCodeGen &RCG,
12768 unsigned N) {
12769 llvm_unreachable("Not supported in SIMD-only mode");
12770}
12771
12773 SourceLocation Loc,
12774 llvm::Value *ReductionsPtr,
12775 LValue SharedLVal) {
12776 llvm_unreachable("Not supported in SIMD-only mode");
12777}
12778
12780 SourceLocation Loc,
12781 const OMPTaskDataTy &Data) {
12782 llvm_unreachable("Not supported in SIMD-only mode");
12783}
12784
12787 OpenMPDirectiveKind CancelRegion) {
12788 llvm_unreachable("Not supported in SIMD-only mode");
12789}
12790
12792 SourceLocation Loc, const Expr *IfCond,
12793 OpenMPDirectiveKind CancelRegion) {
12794 llvm_unreachable("Not supported in SIMD-only mode");
12795}
12796
12798 const OMPExecutableDirective &D, StringRef ParentName,
12799 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12800 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12801 llvm_unreachable("Not supported in SIMD-only mode");
12802}
12803
12806 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12807 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12808 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12809 const OMPLoopDirective &D)>
12810 SizeEmitter) {
12811 llvm_unreachable("Not supported in SIMD-only mode");
12812}
12813
12815 llvm_unreachable("Not supported in SIMD-only mode");
12816}
12817
12819 llvm_unreachable("Not supported in SIMD-only mode");
12820}
12821
12823 return false;
12824}
12825
12827 const OMPExecutableDirective &D,
12828 SourceLocation Loc,
12829 llvm::Function *OutlinedFn,
12830 ArrayRef<llvm::Value *> CapturedVars) {
12831 llvm_unreachable("Not supported in SIMD-only mode");
12832}
12833
12835 const Expr *NumTeams,
12836 const Expr *ThreadLimit,
12837 SourceLocation Loc) {
12838 llvm_unreachable("Not supported in SIMD-only mode");
12839}
12840
12842 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12843 const Expr *Device, const RegionCodeGenTy &CodeGen,
12845 llvm_unreachable("Not supported in SIMD-only mode");
12846}
12847
12849 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12850 const Expr *Device) {
12851 llvm_unreachable("Not supported in SIMD-only mode");
12852}
12853
12855 const OMPLoopDirective &D,
12856 ArrayRef<Expr *> NumIterations) {
12857 llvm_unreachable("Not supported in SIMD-only mode");
12858}
12859
12861 const OMPDependClause *C) {
12862 llvm_unreachable("Not supported in SIMD-only mode");
12863}
12864
12866 const OMPDoacrossClause *C) {
12867 llvm_unreachable("Not supported in SIMD-only mode");
12868}
12869
12870const VarDecl *
12872 const VarDecl *NativeParam) const {
12873 llvm_unreachable("Not supported in SIMD-only mode");
12874}
12875
12876Address
12878 const VarDecl *NativeParam,
12879 const VarDecl *TargetParam) const {
12880 llvm_unreachable("Not supported in SIMD-only mode");
12881}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static std::pair< llvm::Value *, OMPDynGroupprivateFallbackType > emitDynCGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:220
SourceManager & getSourceManager()
Definition ASTContext.h:845
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:938
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static bool hasSameType(QualType T1, QualType T2)
Determine whether the given types T1 and T2 are equivalent.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5266
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3722
Attr - This represents one attribute.
Definition Attr.h:45
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3879
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3913
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1353
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3919
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3907
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3910
This captures a statement into a function.
Definition Stmt.h:3866
const Capture * const_capture_iterator
Definition Stmt.h:4000
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4017
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:3987
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:3970
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1479
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4012
capture_range captures()
Definition Stmt.h:4004
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
Address CreateAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:193
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation())
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message, SourceLocation Loc)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity, SourceLocation Loc)
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, SourceLocation SeverityLoc=SourceLocation(), const Expr *Message=nullptr, SourceLocation MessageLoc=SourceLocation()) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3071
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2394
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2278
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3080
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2251
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5293
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:176
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:244
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2377
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:4876
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:225
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5467
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2202
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2574
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3090
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:295
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1552
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:672
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:188
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
Definition CGExpr.cpp:3038
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1610
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
LValue EmitMemberExpr(const MemberExpr *E)
Definition CGExpr.cpp:5108
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1668
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1807
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:652
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2092
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
const IntrusiveRefCntPtr< llvm::vfs::FileSystem > & getFileSystem() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2944
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1702
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:740
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:182
CharUnits getAlignment() const
Definition CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:338
Address getAddress() const
Definition CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:346
QualType getType() const
Definition CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:71
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:546
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:831
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition Diagnostic.h:905
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
Expr * IgnoreParenNoopCasts(const ASTContext &Ctx) LLVM_READONLY
Skip past any parentheses and casts which do not change the value (including ptr->int casts of the sa...
Definition Expr.cpp:3115
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3093
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3088
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3668
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:276
static bool isSameComparisonOperand(const Expr *E1, const Expr *E2)
Checks that the two Expr's will refer to the same value as a comparison operand.
Definition Expr.cpp:4297
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4033
Represents a member of a struct/union/class.
Definition Decl.h:3160
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3245
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3396
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4689
Represents a function declaration or definition.
Definition Decl.h:2000
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2797
QualType getReturnType() const
Definition Decl.h:2845
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2774
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3736
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3815
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5522
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:974
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3298
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3381
Expr * getBase() const
Definition Expr.h:3375
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:301
bool isExternallyVisible() const
Definition Decl.h:433
This represents clause 'affinity' in the 'pragma omp task'-based directives.
ArrayRef< MappableComponent > MappableExprComponentListRef
static std::pair< const Expr *, std::optional< size_t > > findAttachPtrExpr(MappableExprComponentListRef Components, OpenMPDirectiveKind CurDirKind)
Find the attach pointer expression from a list of mappable expression components.
static QualType getComponentExprElementType(const Expr *Exp)
Get the type of an element of a ComponentList Expr Exp.
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'dyn_groupprivate' clause in 'pragma omp target ...' and 'pragma omp teams ....
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
This represents clause 'in_reduction' in the 'pragma omp task' directives.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5474
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents clause 'lastprivate' in the 'pragma omp ...' directives.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'nowait' clause in the 'pragma omp ...' directive.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'ordered' clause in the 'pragma omp ...' directive.
This represents clause 'private' in the 'pragma omp ...' directives.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents 'threadset' clause in the 'pragma omp task ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1790
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3328
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8278
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8318
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8463
QualType getCanonicalType() const
Definition TypeBase.h:8330
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4312
field_iterator field_end() const
Definition Decl.h:4518
field_range fields() const
Definition Decl.h:4515
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5218
bool field_empty() const
Definition Decl.h:4523
field_iterator field_begin() const
Definition Decl.cpp:5202
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:85
child_range children()
Definition Stmt.cpp:299
StmtClass getStmtClass() const
Definition Stmt.h:1472
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:338
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:350
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4895
bool isUnion() const
Definition Decl.h:3922
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8871
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9051
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2205
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8614
bool isPointerType() const
Definition TypeBase.h:8515
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8915
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9158
bool isReferenceType() const
Definition TypeBase.h:8539
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
bool isLValueReferenceType() const
Definition TypeBase.h:8543
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2411
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3119
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9044
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2800
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9144
bool isFloatingType() const
Definition Type.cpp:2304
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2253
bool isAnyPointerType() const
Definition TypeBase.h:8523
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9091
bool isRecordType() const
Definition TypeBase.h:8642
bool isUnionType() const
Definition Type.cpp:718
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:712
QualType getType() const
Definition Decl.h:723
Represents a variable declaration or definition.
Definition Decl.h:926
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2257
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2366
const Expr * getInit() const
Definition Decl.h:1368
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1217
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1295
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2375
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1262
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1358
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3966
Expr * getSizeExpr() const
Definition TypeBase.h:3980
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:154
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:145
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
ComparisonResult
Indicates the result of a tentative comparison.
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
@ Conditional
A conditional (?:) operator.
Definition Sema.h:667
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
static bool classof(const Stmt *T)
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ OMPC_DYN_GROUPPRIVATE_FALLBACK_unknown
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5886
bool isOpenMPTargetMapEnteringDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a map-entering target directive.
@ Type
The name was classified as a type.
Definition Sema.h:562
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1746
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPThreadsetKind
OpenMP modifiers for 'threadset' clause.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5339
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.