clang 19.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 CGF.GetAddrOfLocalVar(PartIDVar),
166 PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
171 CGF.EmitBlock(DoneBB);
173 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
174 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
175 CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 CGF.GetAddrOfLocalVar(PartIDVar),
183 PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
185 PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(".untied.next.");
190 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
191 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
192 CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(CurPoint);
194 CGF.EmitBlock(CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
375 C.getLocation());
376 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
604 const auto *CE = cast<CallExpr>(InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
614 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
615 (void)PrivateScope.Privatize();
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(DestBegin, EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
711 SrcElementCurrent, ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
733 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
746 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(Shareds.size());
772 SharedAddresses.reserve(Shareds.size());
773 Sizes.reserve(Shareds.size());
774 BaseDecls.reserve(Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
780 std::advance(IOrig, 1);
781 std::advance(IPriv, 1);
782 std::advance(IRed, 1);
783 }
784}
785
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
791 SharedAddresses.emplace_back(First, Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(First, Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
797 OrigAddresses.emplace_back(First, Second);
798 }
799}
800
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemType,
816 OrigAddresses[N].second.getPointer(CGF),
817 OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
825 }
826 Sizes.emplace_back(SizeInChars, Size);
828 CGF,
829 cast<OpaqueValueExpr>(
830 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
831 RValue::get(Size));
832 CGF.EmitVariablyModifiedType(PrivateType);
833}
834
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
845 CGF,
846 cast<OpaqueValueExpr>(
847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
848 RValue::get(Size));
849 CGF.EmitVariablyModifiedType(PrivateType);
850}
851
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
868 PrivateAddr, SharedAddr, SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
872 PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
890 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
891 }
892}
893
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 BaseLV.getAddress(CGF).withElementType(CGF.ConvertTypeForMem(ElTy)),
909 BaseLV.getType(), BaseLV.getBaseInfo(),
910 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
911}
912
914 Address OriginalBaseAddress, llvm::Value *Addr) {
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
921 Tmp = CGF.CreateMemTemp(BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
932 Addr, Tmp.getElementType());
933 CGF.Builder.CreateStore(Addr, Tmp);
934 return MostTopTmp;
935 }
936
938 Addr, OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 DE = cast<DeclRefExpr>(Base);
951 OrigVD = cast<VarDecl>(DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 DE = cast<DeclRefExpr>(Base);
957 OrigVD = cast<VarDecl>(DE->getDecl());
958 }
959 return OrigVD;
960}
961
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
974 SharedAddr.emitRawPointer(CGF));
975 llvm::Value *PrivatePointer =
977 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 SharedAddr.getElementType(), PrivatePointer, Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(CGF), Ptr);
983 }
984 BaseDecls.emplace_back(
985 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1021}
1022
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1027 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 OMPBuilder.initialize();
1043 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1045 : StringRef{});
1046 OMPBuilder.setConfig(Config);
1047
1048 // The user forces the compiler to behave as if omp requires
1049 // unified_shared_memory was given.
1050 if (CGM.getLangOpts().OpenMPForceUSM) {
1052 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053 }
1054}
1055
1057 InternalVars.clear();
1058 // Clean non-target variable declarations possibly used only in debug info.
1059 for (const auto &Data : EmittedNonTargetVariables) {
1060 if (!Data.getValue().pointsToAliveValue())
1061 continue;
1062 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1063 if (!GV)
1064 continue;
1065 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1066 continue;
1067 GV->eraseFromParent();
1068 }
1069}
1070
1072 return OMPBuilder.createPlatformSpecificName(Parts);
1073}
1074
1075static llvm::Function *
1077 const Expr *CombinerInitializer, const VarDecl *In,
1078 const VarDecl *Out, bool IsCombiner) {
1079 // void .omp_combiner.(Ty *in, Ty *out);
1080 ASTContext &C = CGM.getContext();
1081 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1082 FunctionArgList Args;
1083 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 Args.push_back(&OmpOutParm);
1088 Args.push_back(&OmpInParm);
1089 const CGFunctionInfo &FnInfo =
1090 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1091 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1092 std::string Name = CGM.getOpenMPRuntime().getName(
1093 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1094 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1095 Name, &CGM.getModule());
1096 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1097 if (CGM.getLangOpts().Optimize) {
1098 Fn->removeFnAttr(llvm::Attribute::NoInline);
1099 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1100 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1101 }
1102 CodeGenFunction CGF(CGM);
1103 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1104 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1105 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1106 Out->getLocation());
1107 CodeGenFunction::OMPPrivateScope Scope(CGF);
1108 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1109 Scope.addPrivate(
1110 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1111 .getAddress(CGF));
1112 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1113 Scope.addPrivate(
1114 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1115 .getAddress(CGF));
1116 (void)Scope.Privatize();
1117 if (!IsCombiner && Out->hasInit() &&
1118 !CGF.isTrivialInitializer(Out->getInit())) {
1119 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1120 Out->getType().getQualifiers(),
1121 /*IsInitializer=*/true);
1122 }
1123 if (CombinerInitializer)
1124 CGF.EmitIgnoredExpr(CombinerInitializer);
1125 Scope.ForceCleanup();
1126 CGF.FinishFunction();
1127 return Fn;
1128}
1129
1132 if (UDRMap.count(D) > 0)
1133 return;
1134 llvm::Function *Combiner = emitCombinerOrInitializer(
1135 CGM, D->getType(), D->getCombiner(),
1136 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1137 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1138 /*IsCombiner=*/true);
1139 llvm::Function *Initializer = nullptr;
1140 if (const Expr *Init = D->getInitializer()) {
1142 CGM, D->getType(),
1144 : nullptr,
1145 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1146 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1147 /*IsCombiner=*/false);
1148 }
1149 UDRMap.try_emplace(D, Combiner, Initializer);
1150 if (CGF) {
1151 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1152 Decls.second.push_back(D);
1153 }
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1158 auto I = UDRMap.find(D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 };
1196
1197 // TODO: Remove this once we emit parallel regions through the
1198 // OpenMPIRBuilder as it can do this setup internally.
1199 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1200 OMPBuilder->pushFinalizationCB(std::move(FI));
1201 }
1202 ~PushAndPopStackRAII() {
1203 if (OMPBuilder)
1204 OMPBuilder->popFinalizationCB();
1205 }
1206 llvm::OpenMPIRBuilder *OMPBuilder;
1207};
1208} // namespace
1209
1211 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1212 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1213 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1214 assert(ThreadIDVar->getType()->isPointerType() &&
1215 "thread id variable must be of type kmp_int32 *");
1216 CodeGenFunction CGF(CGM, true);
1217 bool HasCancel = false;
1218 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1219 HasCancel = OPD->hasCancel();
1220 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1223 HasCancel = OPSD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD =
1234 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1235 HasCancel = OPFD->hasCancel();
1236
1237 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1238 // parallel region to make cancellation barriers work properly.
1239 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1240 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1241 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1242 HasCancel, OutlinedHelperName);
1243 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1245}
1246
1247std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1248 std::string Suffix = getName({"omp_outlined"});
1249 return (Name + Suffix).str();
1250}
1251
1253 return getOutlinedHelperName(CGF.CurFn->getName());
1254}
1255
1256std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1257 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1258 return (Name + Suffix).str();
1259}
1260
1263 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1264 const RegionCodeGenTy &CodeGen) {
1265 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1267 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1268 CodeGen);
1269}
1270
1273 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274 const RegionCodeGenTy &CodeGen) {
1275 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1277 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278 CodeGen);
1279}
1280
1282 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285 bool Tied, unsigned &NumberOfParts) {
1286 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287 PrePostActionTy &) {
1288 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1289 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1290 llvm::Value *TaskArgs[] = {
1291 UpLoc, ThreadID,
1292 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1293 TaskTVar->getType()->castAs<PointerType>())
1294 .getPointer(CGF)};
1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296 CGM.getModule(), OMPRTL___kmpc_omp_task),
1297 TaskArgs);
1298 };
1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300 UntiedCodeGen);
1301 CodeGen.setAction(Action);
1302 assert(!ThreadIDVar->getType()->isPointerType() &&
1303 "thread id variable must be of type kmp_int32 for tasks");
1304 const OpenMPDirectiveKind Region =
1305 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306 : OMPD_task;
1307 const CapturedStmt *CS = D.getCapturedStmt(Region);
1308 bool HasCancel = false;
1309 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317
1318 CodeGenFunction CGF(CGM, true);
1319 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320 InnermostKind, HasCancel, Action);
1321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1323 if (!Tied)
1324 NumberOfParts = Action.getNumberOfParts();
1325 return Res;
1326}
1327
1329 bool AtCurrentPoint) {
1330 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1331 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332
1333 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1334 if (AtCurrentPoint) {
1335 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337 } else {
1338 Elem.second.ServiceInsertPt =
1339 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1341 }
1342}
1343
1345 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1346 if (Elem.second.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348 Elem.second.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1359 OS << ";" << PLoc.getFilename() << ";";
1360 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1361 OS << FD->getQualifiedNameAsString();
1362 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363 return OS.str();
1364}
1365
1367 SourceLocation Loc,
1368 unsigned Flags, bool EmitLoc) {
1369 uint32_t SrcLocStrSize;
1370 llvm::Constant *SrcLocStr;
1371 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1372 llvm::codegenoptions::NoDebugInfo) ||
1373 Loc.isInvalid()) {
1374 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375 } else {
1376 std::string FunctionName;
1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1378 FunctionName = FD->getQualifiedNameAsString();
1380 const char *FileName = PLoc.getFilename();
1381 unsigned Line = PLoc.getLine();
1382 unsigned Column = PLoc.getColumn();
1383 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384 Column, SrcLocStrSize);
1385 }
1386 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387 return OMPBuilder.getOrCreateIdent(
1388 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1389}
1390
1392 SourceLocation Loc) {
1393 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395 // the clang invariants used below might be broken.
1396 if (CGM.getLangOpts().OpenMPIRBuilder) {
1397 SmallString<128> Buffer;
1398 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1399 uint32_t SrcLocStrSize;
1400 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402 return OMPBuilder.getOrCreateThreadID(
1403 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404 }
1405
1406 llvm::Value *ThreadID = nullptr;
1407 // Check whether we've already cached a load of the thread id in this
1408 // function.
1409 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1410 if (I != OpenMPLocThreadIDMap.end()) {
1411 ThreadID = I->second.ThreadID;
1412 if (ThreadID != nullptr)
1413 return ThreadID;
1414 }
1415 // If exceptions are enabled, do not use parameter to avoid possible crash.
1416 if (auto *OMPRegionInfo =
1417 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1418 if (OMPRegionInfo->getThreadIDVariable()) {
1419 // Check if this an outlined function with thread id passed as argument.
1420 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423 !CGF.getLangOpts().CXXExceptions ||
1424 CGF.Builder.GetInsertBlock() == TopBlock ||
1425 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1426 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1427 TopBlock ||
1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429 CGF.Builder.GetInsertBlock()) {
1430 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1431 // If value loaded in entry block, cache it and use it everywhere in
1432 // function.
1433 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1435 Elem.second.ThreadID = ThreadID;
1436 }
1437 return ThreadID;
1438 }
1439 }
1440 }
1441
1442 // This is not an outlined function region - need to call __kmpc_int32
1443 // kmpc_global_thread_num(ident_t *loc).
1444 // Generate thread id value and cache this value for use across the
1445 // function.
1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1447 if (!Elem.second.ServiceInsertPt)
1449 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1452 llvm::CallInst *Call = CGF.Builder.CreateCall(
1453 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1454 OMPRTL___kmpc_global_thread_num),
1455 emitUpdateLocation(CGF, Loc));
1456 Call->setCallingConv(CGF.getRuntimeCC());
1457 Elem.second.ThreadID = Call;
1458 return Call;
1459}
1460
1462 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1465 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1466 }
1467 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1468 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1469 UDRMap.erase(D);
1470 FunctionUDRMap.erase(CGF.CurFn);
1471 }
1472 auto I = FunctionUDMMap.find(CGF.CurFn);
1473 if (I != FunctionUDMMap.end()) {
1474 for(const auto *D : I->second)
1475 UDMMap.erase(D);
1476 FunctionUDMMap.erase(I);
1477 }
1480}
1481
1483 return OMPBuilder.IdentPtr;
1484}
1485
1487 if (!Kmpc_MicroTy) {
1488 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1489 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1490 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1491 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1492 }
1493 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1494}
1495
1496llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1498 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1499 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1500 if (!DevTy)
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1502
1503 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1504 case OMPDeclareTargetDeclAttr::DT_Host:
1505 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1506 break;
1507 case OMPDeclareTargetDeclAttr::DT_NoHost:
1508 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1509 break;
1510 case OMPDeclareTargetDeclAttr::DT_Any:
1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1512 break;
1513 default:
1514 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1515 break;
1516 }
1517}
1518
1519llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1521 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1523 if (!MapType)
1524 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1525 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1526 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1527 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1528 break;
1529 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1530 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1531 break;
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1534 break;
1535 default:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1537 break;
1538 }
1539}
1540
1541static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1542 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1543 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1544
1545 auto FileInfoCallBack = [&]() {
1547 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1548
1549 llvm::sys::fs::UniqueID ID;
1550 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1551 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1552 }
1553
1554 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555 };
1556
1557 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1558}
1559
1561 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1562
1563 auto LinkageForVariable = [&VD, this]() {
1565 };
1566
1567 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568
1569 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1571 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1574 VD->isExternallyVisible(),
1576 VD->getCanonicalDecl()->getBeginLoc()),
1577 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1578 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1579 LinkageForVariable);
1580
1581 if (!addr)
1582 return ConstantAddress::invalid();
1583 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584}
1585
1586llvm::Constant *
1588 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1590 // Lookup the entry, lazily creating it if necessary.
1591 std::string Suffix = getName({"cache", ""});
1592 return OMPBuilder.getOrCreateInternalVariable(
1593 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1594}
1595
1597 const VarDecl *VD,
1598 Address VDAddr,
1599 SourceLocation Loc) {
1600 if (CGM.getLangOpts().OpenMPUseTLS &&
1602 return VDAddr;
1603
1604 llvm::Type *VarTy = VDAddr.getElementType();
1605 llvm::Value *Args[] = {
1606 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1607 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1610 return Address(
1611 CGF.EmitRuntimeCall(
1612 OMPBuilder.getOrCreateRuntimeFunction(
1613 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1614 Args),
1615 CGF.Int8Ty, VDAddr.getAlignment());
1616}
1617
1619 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622 // library.
1623 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1625 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1626 OMPLoc);
1627 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628 // to register constructor/destructor for variable.
1629 llvm::Value *Args[] = {
1630 OMPLoc,
1631 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1632 Ctor, CopyCtor, Dtor};
1633 CGF.EmitRuntimeCall(
1634 OMPBuilder.getOrCreateRuntimeFunction(
1635 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1636 Args);
1637}
1638
1640 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1641 bool PerformInit, CodeGenFunction *CGF) {
1642 if (CGM.getLangOpts().OpenMPUseTLS &&
1644 return nullptr;
1645
1646 VD = VD->getDefinition(CGM.getContext());
1647 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1648 QualType ASTTy = VD->getType();
1649
1650 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1651 const Expr *Init = VD->getAnyInitializer();
1652 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1653 // Generate function that re-emits the declaration's initializer into the
1654 // threadprivate copy of the variable VD
1655 CodeGenFunction CtorCGF(CGM);
1656 FunctionArgList Args;
1657 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1658 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1660 Args.push_back(&Dst);
1661
1663 CGM.getContext().VoidPtrTy, Args);
1664 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1665 std::string Name = getName({"__kmpc_global_ctor_", ""});
1666 llvm::Function *Fn =
1667 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1668 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1669 Args, Loc, Loc);
1670 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1673 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1674 VDAddr.getAlignment());
1675 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1676 /*IsInitializer=*/true);
1677 ArgVal = CtorCGF.EmitLoadOfScalar(
1678 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1680 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1681 CtorCGF.FinishFunction();
1682 Ctor = Fn;
1683 }
1685 // Generate function that emits destructor call for the threadprivate copy
1686 // of the variable VD
1687 CodeGenFunction DtorCGF(CGM);
1688 FunctionArgList Args;
1689 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1690 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1692 Args.push_back(&Dst);
1693
1695 CGM.getContext().VoidTy, Args);
1696 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1697 std::string Name = getName({"__kmpc_global_dtor_", ""});
1698 llvm::Function *Fn =
1699 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1700 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1701 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1702 Loc, Loc);
1703 // Create a scope with an artificial location for the body of this function.
1704 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1705 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1706 DtorCGF.GetAddrOfLocalVar(&Dst),
1707 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1708 DtorCGF.emitDestroy(
1709 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1710 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1711 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1712 DtorCGF.FinishFunction();
1713 Dtor = Fn;
1714 }
1715 // Do not emit init function if it is not required.
1716 if (!Ctor && !Dtor)
1717 return nullptr;
1718
1719 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1720 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1721 /*isVarArg=*/false)
1722 ->getPointerTo();
1723 // Copying constructor for the threadprivate variable.
1724 // Must be NULL - reserved by runtime, but currently it requires that this
1725 // parameter is always NULL. Otherwise it fires assertion.
1726 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1727 if (Ctor == nullptr) {
1728 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1729 /*isVarArg=*/false)
1730 ->getPointerTo();
1731 Ctor = llvm::Constant::getNullValue(CtorTy);
1732 }
1733 if (Dtor == nullptr) {
1734 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1735 /*isVarArg=*/false)
1736 ->getPointerTo();
1737 Dtor = llvm::Constant::getNullValue(DtorTy);
1738 }
1739 if (!CGF) {
1740 auto *InitFunctionTy =
1741 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1742 std::string Name = getName({"__omp_threadprivate_init_", ""});
1743 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1744 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1745 CodeGenFunction InitCGF(CGM);
1746 FunctionArgList ArgList;
1747 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1748 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1749 Loc, Loc);
1750 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1751 InitCGF.FinishFunction();
1752 return InitFunction;
1753 }
1754 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1755 }
1756 return nullptr;
1757}
1758
1760 llvm::GlobalValue *GV) {
1761 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1762 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1763
1764 // We only need to handle active 'indirect' declare target functions.
1765 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1766 return;
1767
1768 // Get a mangled name to store the new device global in.
1769 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1771 SmallString<128> Name;
1772 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1773
1774 // We need to generate a new global to hold the address of the indirectly
1775 // called device function. Doing this allows us to keep the visibility and
1776 // linkage of the associated function unchanged while allowing the runtime to
1777 // access its value.
1778 llvm::GlobalValue *Addr = GV;
1779 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1780 Addr = new llvm::GlobalVariable(
1782 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1783 nullptr, llvm::GlobalValue::NotThreadLocal,
1784 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1785 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1786 }
1787
1788 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1790 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1791 llvm::GlobalValue::WeakODRLinkage);
1792}
1793
1795 QualType VarType,
1796 StringRef Name) {
1797 std::string Suffix = getName({"artificial", ""});
1798 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1799 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1800 VarLVType, Twine(Name).concat(Suffix).str());
1801 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1803 GAddr->setThreadLocal(/*Val=*/true);
1804 return Address(GAddr, GAddr->getValueType(),
1806 }
1807 std::string CacheSuffix = getName({"cache", ""});
1808 llvm::Value *Args[] = {
1812 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1813 /*isSigned=*/false),
1814 OMPBuilder.getOrCreateInternalVariable(
1816 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1817 return Address(
1819 CGF.EmitRuntimeCall(
1820 OMPBuilder.getOrCreateRuntimeFunction(
1821 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1822 Args),
1823 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1824 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1825}
1826
1828 const RegionCodeGenTy &ThenGen,
1829 const RegionCodeGenTy &ElseGen) {
1830 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1831
1832 // If the condition constant folds and can be elided, try to avoid emitting
1833 // the condition and the dead arm of the if/else.
1834 bool CondConstant;
1835 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1836 if (CondConstant)
1837 ThenGen(CGF);
1838 else
1839 ElseGen(CGF);
1840 return;
1841 }
1842
1843 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1844 // emit the conditional branch.
1845 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1846 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1847 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1848 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1849
1850 // Emit the 'then' code.
1851 CGF.EmitBlock(ThenBlock);
1852 ThenGen(CGF);
1853 CGF.EmitBranch(ContBlock);
1854 // Emit the 'else' code if present.
1855 // There is no need to emit line number for unconditional branch.
1857 CGF.EmitBlock(ElseBlock);
1858 ElseGen(CGF);
1859 // There is no need to emit line number for unconditional branch.
1861 CGF.EmitBranch(ContBlock);
1862 // Emit the continuation block for code after the if.
1863 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1864}
1865
1867 llvm::Function *OutlinedFn,
1868 ArrayRef<llvm::Value *> CapturedVars,
1869 const Expr *IfCond,
1870 llvm::Value *NumThreads) {
1871 if (!CGF.HaveInsertPoint())
1872 return;
1873 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1874 auto &M = CGM.getModule();
1875 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1876 this](CodeGenFunction &CGF, PrePostActionTy &) {
1877 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1879 llvm::Value *Args[] = {
1880 RTLoc,
1881 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1882 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1884 RealArgs.append(std::begin(Args), std::end(Args));
1885 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1886
1887 llvm::FunctionCallee RTLFn =
1888 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1889 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1890 };
1891 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1892 this](CodeGenFunction &CGF, PrePostActionTy &) {
1894 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1895 // Build calls:
1896 // __kmpc_serialized_parallel(&Loc, GTid);
1897 llvm::Value *Args[] = {RTLoc, ThreadID};
1898 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1899 M, OMPRTL___kmpc_serialized_parallel),
1900 Args);
1901
1902 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1903 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1904 RawAddress ZeroAddrBound =
1906 /*Name=*/".bound.zero.addr");
1907 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1909 // ThreadId for serialized parallels is 0.
1910 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1911 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1912 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1913
1914 // Ensure we do not inline the function. This is trivially true for the ones
1915 // passed to __kmpc_fork_call but the ones called in serialized regions
1916 // could be inlined. This is not a perfect but it is closer to the invariant
1917 // we want, namely, every data environment starts with a new function.
1918 // TODO: We should pass the if condition to the runtime function and do the
1919 // handling there. Much cleaner code.
1920 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1921 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1922 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1923
1924 // __kmpc_end_serialized_parallel(&Loc, GTid);
1925 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1926 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1927 M, OMPRTL___kmpc_end_serialized_parallel),
1928 EndArgs);
1929 };
1930 if (IfCond) {
1931 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1932 } else {
1933 RegionCodeGenTy ThenRCG(ThenGen);
1934 ThenRCG(CGF);
1935 }
1936}
1937
1938// If we're inside an (outlined) parallel region, use the region info's
1939// thread-ID variable (it is passed in a first argument of the outlined function
1940// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1941// regular serial code region, get thread ID by calling kmp_int32
1942// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1943// return the address of that temp.
1945 SourceLocation Loc) {
1946 if (auto *OMPRegionInfo =
1947 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1948 if (OMPRegionInfo->getThreadIDVariable())
1949 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1950
1951 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1952 QualType Int32Ty =
1953 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1954 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1955 CGF.EmitStoreOfScalar(ThreadID,
1956 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1957
1958 return ThreadIDTemp;
1959}
1960
1961llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1962 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1963 std::string Name = getName({Prefix, "var"});
1964 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1965}
1966
1967namespace {
1968/// Common pre(post)-action for different OpenMP constructs.
1969class CommonActionTy final : public PrePostActionTy {
1970 llvm::FunctionCallee EnterCallee;
1971 ArrayRef<llvm::Value *> EnterArgs;
1972 llvm::FunctionCallee ExitCallee;
1973 ArrayRef<llvm::Value *> ExitArgs;
1974 bool Conditional;
1975 llvm::BasicBlock *ContBlock = nullptr;
1976
1977public:
1978 CommonActionTy(llvm::FunctionCallee EnterCallee,
1979 ArrayRef<llvm::Value *> EnterArgs,
1980 llvm::FunctionCallee ExitCallee,
1981 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1982 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1983 ExitArgs(ExitArgs), Conditional(Conditional) {}
1984 void Enter(CodeGenFunction &CGF) override {
1985 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1986 if (Conditional) {
1987 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1988 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1989 ContBlock = CGF.createBasicBlock("omp_if.end");
1990 // Generate the branch (If-stmt)
1991 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1992 CGF.EmitBlock(ThenBlock);
1993 }
1994 }
1995 void Done(CodeGenFunction &CGF) {
1996 // Emit the rest of blocks/branches
1997 CGF.EmitBranch(ContBlock);
1998 CGF.EmitBlock(ContBlock, true);
1999 }
2000 void Exit(CodeGenFunction &CGF) override {
2001 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2002 }
2003};
2004} // anonymous namespace
2005
2007 StringRef CriticalName,
2008 const RegionCodeGenTy &CriticalOpGen,
2009 SourceLocation Loc, const Expr *Hint) {
2010 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2011 // CriticalOpGen();
2012 // __kmpc_end_critical(ident_t *, gtid, Lock);
2013 // Prepare arguments and build a call to __kmpc_critical
2014 if (!CGF.HaveInsertPoint())
2015 return;
2016 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2017 getCriticalRegionLock(CriticalName)};
2018 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2019 std::end(Args));
2020 if (Hint) {
2021 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2022 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2023 }
2024 CommonActionTy Action(
2025 OMPBuilder.getOrCreateRuntimeFunction(
2026 CGM.getModule(),
2027 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2028 EnterArgs,
2029 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2030 OMPRTL___kmpc_end_critical),
2031 Args);
2032 CriticalOpGen.setAction(Action);
2033 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2034}
2035
2037 const RegionCodeGenTy &MasterOpGen,
2038 SourceLocation Loc) {
2039 if (!CGF.HaveInsertPoint())
2040 return;
2041 // if(__kmpc_master(ident_t *, gtid)) {
2042 // MasterOpGen();
2043 // __kmpc_end_master(ident_t *, gtid);
2044 // }
2045 // Prepare arguments and build a call to __kmpc_master
2046 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2047 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2048 CGM.getModule(), OMPRTL___kmpc_master),
2049 Args,
2050 OMPBuilder.getOrCreateRuntimeFunction(
2051 CGM.getModule(), OMPRTL___kmpc_end_master),
2052 Args,
2053 /*Conditional=*/true);
2054 MasterOpGen.setAction(Action);
2055 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2056 Action.Done(CGF);
2057}
2058
2060 const RegionCodeGenTy &MaskedOpGen,
2061 SourceLocation Loc, const Expr *Filter) {
2062 if (!CGF.HaveInsertPoint())
2063 return;
2064 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2065 // MaskedOpGen();
2066 // __kmpc_end_masked(iden_t *, gtid);
2067 // }
2068 // Prepare arguments and build a call to __kmpc_masked
2069 llvm::Value *FilterVal = Filter
2070 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2071 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2072 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2073 FilterVal};
2074 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2075 getThreadID(CGF, Loc)};
2076 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2077 CGM.getModule(), OMPRTL___kmpc_masked),
2078 Args,
2079 OMPBuilder.getOrCreateRuntimeFunction(
2080 CGM.getModule(), OMPRTL___kmpc_end_masked),
2081 ArgsEnd,
2082 /*Conditional=*/true);
2083 MaskedOpGen.setAction(Action);
2084 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2085 Action.Done(CGF);
2086}
2087
2089 SourceLocation Loc) {
2090 if (!CGF.HaveInsertPoint())
2091 return;
2092 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2093 OMPBuilder.createTaskyield(CGF.Builder);
2094 } else {
2095 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2096 llvm::Value *Args[] = {
2097 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2098 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2099 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2101 Args);
2102 }
2103
2104 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2105 Region->emitUntiedSwitch(CGF);
2106}
2107
2109 const RegionCodeGenTy &TaskgroupOpGen,
2110 SourceLocation Loc) {
2111 if (!CGF.HaveInsertPoint())
2112 return;
2113 // __kmpc_taskgroup(ident_t *, gtid);
2114 // TaskgroupOpGen();
2115 // __kmpc_end_taskgroup(ident_t *, gtid);
2116 // Prepare arguments and build a call to __kmpc_taskgroup
2117 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2118 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2119 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2120 Args,
2121 OMPBuilder.getOrCreateRuntimeFunction(
2122 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2123 Args);
2124 TaskgroupOpGen.setAction(Action);
2125 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2126}
2127
2128/// Given an array of pointers to variables, project the address of a
2129/// given variable.
2131 unsigned Index, const VarDecl *Var) {
2132 // Pull out the pointer to the variable.
2133 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2134 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2135
2136 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2137 return Address(
2138 CGF.Builder.CreateBitCast(
2139 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2140 ElemTy, CGF.getContext().getDeclAlign(Var));
2141}
2142
2144 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2145 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2146 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2147 SourceLocation Loc) {
2148 ASTContext &C = CGM.getContext();
2149 // void copy_func(void *LHSArg, void *RHSArg);
2150 FunctionArgList Args;
2151 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2153 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2155 Args.push_back(&LHSArg);
2156 Args.push_back(&RHSArg);
2157 const auto &CGFI =
2158 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2159 std::string Name =
2160 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2161 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2162 llvm::GlobalValue::InternalLinkage, Name,
2163 &CGM.getModule());
2165 Fn->setDoesNotRecurse();
2166 CodeGenFunction CGF(CGM);
2167 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2168 // Dest = (void*[n])(LHSArg);
2169 // Src = (void*[n])(RHSArg);
2171 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2172 ArgsElemType->getPointerTo()),
2173 ArgsElemType, CGF.getPointerAlign());
2175 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2176 ArgsElemType->getPointerTo()),
2177 ArgsElemType, CGF.getPointerAlign());
2178 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2179 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2180 // ...
2181 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2182 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2183 const auto *DestVar =
2184 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2185 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2186
2187 const auto *SrcVar =
2188 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2189 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2190
2191 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2192 QualType Type = VD->getType();
2193 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2194 }
2195 CGF.FinishFunction();
2196 return Fn;
2197}
2198
2200 const RegionCodeGenTy &SingleOpGen,
2201 SourceLocation Loc,
2202 ArrayRef<const Expr *> CopyprivateVars,
2203 ArrayRef<const Expr *> SrcExprs,
2204 ArrayRef<const Expr *> DstExprs,
2205 ArrayRef<const Expr *> AssignmentOps) {
2206 if (!CGF.HaveInsertPoint())
2207 return;
2208 assert(CopyprivateVars.size() == SrcExprs.size() &&
2209 CopyprivateVars.size() == DstExprs.size() &&
2210 CopyprivateVars.size() == AssignmentOps.size());
2212 // int32 did_it = 0;
2213 // if(__kmpc_single(ident_t *, gtid)) {
2214 // SingleOpGen();
2215 // __kmpc_end_single(ident_t *, gtid);
2216 // did_it = 1;
2217 // }
2218 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2219 // <copy_func>, did_it);
2220
2221 Address DidIt = Address::invalid();
2222 if (!CopyprivateVars.empty()) {
2223 // int32 did_it = 0;
2224 QualType KmpInt32Ty =
2225 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2226 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2227 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2228 }
2229 // Prepare arguments and build a call to __kmpc_single
2230 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2231 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2232 CGM.getModule(), OMPRTL___kmpc_single),
2233 Args,
2234 OMPBuilder.getOrCreateRuntimeFunction(
2235 CGM.getModule(), OMPRTL___kmpc_end_single),
2236 Args,
2237 /*Conditional=*/true);
2238 SingleOpGen.setAction(Action);
2239 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2240 if (DidIt.isValid()) {
2241 // did_it = 1;
2242 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2243 }
2244 Action.Done(CGF);
2245 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2246 // <copy_func>, did_it);
2247 if (DidIt.isValid()) {
2248 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2249 QualType CopyprivateArrayTy = C.getConstantArrayType(
2250 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2251 /*IndexTypeQuals=*/0);
2252 // Create a list of all private variables for copyprivate.
2253 Address CopyprivateList =
2254 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2255 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2256 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2257 CGF.Builder.CreateStore(
2259 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2260 CGF.VoidPtrTy),
2261 Elem);
2262 }
2263 // Build function that copies private values from single region to all other
2264 // threads in the corresponding parallel region.
2265 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2266 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2267 SrcExprs, DstExprs, AssignmentOps, Loc);
2268 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2270 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2271 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2272 llvm::Value *Args[] = {
2273 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2274 getThreadID(CGF, Loc), // i32 <gtid>
2275 BufSize, // size_t <buf_size>
2276 CL.emitRawPointer(CGF), // void *<copyprivate list>
2277 CpyFn, // void (*) (void *, void *) <copy_func>
2278 DidItVal // i32 did_it
2279 };
2280 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2281 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2282 Args);
2283 }
2284}
2285
2287 const RegionCodeGenTy &OrderedOpGen,
2288 SourceLocation Loc, bool IsThreads) {
2289 if (!CGF.HaveInsertPoint())
2290 return;
2291 // __kmpc_ordered(ident_t *, gtid);
2292 // OrderedOpGen();
2293 // __kmpc_end_ordered(ident_t *, gtid);
2294 // Prepare arguments and build a call to __kmpc_ordered
2295 if (IsThreads) {
2296 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2297 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2298 CGM.getModule(), OMPRTL___kmpc_ordered),
2299 Args,
2300 OMPBuilder.getOrCreateRuntimeFunction(
2301 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2302 Args);
2303 OrderedOpGen.setAction(Action);
2304 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2305 return;
2306 }
2307 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2308}
2309
2311 unsigned Flags;
2312 if (Kind == OMPD_for)
2313 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2314 else if (Kind == OMPD_sections)
2315 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2316 else if (Kind == OMPD_single)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2318 else if (Kind == OMPD_barrier)
2319 Flags = OMP_IDENT_BARRIER_EXPL;
2320 else
2321 Flags = OMP_IDENT_BARRIER_IMPL;
2322 return Flags;
2323}
2324
2326 CodeGenFunction &CGF, const OMPLoopDirective &S,
2327 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2328 // Check if the loop directive is actually a doacross loop directive. In this
2329 // case choose static, 1 schedule.
2330 if (llvm::any_of(
2331 S.getClausesOfKind<OMPOrderedClause>(),
2332 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2333 ScheduleKind = OMPC_SCHEDULE_static;
2334 // Chunk size is 1 in this case.
2335 llvm::APInt ChunkSize(32, 1);
2336 ChunkExpr = IntegerLiteral::Create(
2337 CGF.getContext(), ChunkSize,
2338 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2339 SourceLocation());
2340 }
2341}
2342
2344 OpenMPDirectiveKind Kind, bool EmitChecks,
2345 bool ForceSimpleCall) {
2346 // Check if we should use the OMPBuilder
2347 auto *OMPRegionInfo =
2348 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2349 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2350 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2351 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2352 return;
2353 }
2354
2355 if (!CGF.HaveInsertPoint())
2356 return;
2357 // Build call __kmpc_cancel_barrier(loc, thread_id);
2358 // Build call __kmpc_barrier(loc, thread_id);
2359 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2360 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2361 // thread_id);
2362 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2363 getThreadID(CGF, Loc)};
2364 if (OMPRegionInfo) {
2365 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2366 llvm::Value *Result = CGF.EmitRuntimeCall(
2367 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2368 OMPRTL___kmpc_cancel_barrier),
2369 Args);
2370 if (EmitChecks) {
2371 // if (__kmpc_cancel_barrier()) {
2372 // exit from construct;
2373 // }
2374 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2375 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2376 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2377 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2378 CGF.EmitBlock(ExitBB);
2379 // exit from construct;
2380 CodeGenFunction::JumpDest CancelDestination =
2381 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2382 CGF.EmitBranchThroughCleanup(CancelDestination);
2383 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2384 }
2385 return;
2386 }
2387 }
2388 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2389 CGM.getModule(), OMPRTL___kmpc_barrier),
2390 Args);
2391}
2392
2394 Expr *ME, bool IsFatal) {
2395 llvm::Value *MVL =
2396 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2397 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2398 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2399 // *message)
2400 llvm::Value *Args[] = {
2401 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2402 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2403 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2404 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2405 CGM.getModule(), OMPRTL___kmpc_error),
2406 Args);
2407}
2408
2409/// Map the OpenMP loop schedule to the runtime enumeration.
2410static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2411 bool Chunked, bool Ordered) {
2412 switch (ScheduleKind) {
2413 case OMPC_SCHEDULE_static:
2414 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2415 : (Ordered ? OMP_ord_static : OMP_sch_static);
2416 case OMPC_SCHEDULE_dynamic:
2417 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2418 case OMPC_SCHEDULE_guided:
2419 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2420 case OMPC_SCHEDULE_runtime:
2421 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2422 case OMPC_SCHEDULE_auto:
2423 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2425 assert(!Chunked && "chunk was specified but schedule kind not known");
2426 return Ordered ? OMP_ord_static : OMP_sch_static;
2427 }
2428 llvm_unreachable("Unexpected runtime schedule");
2429}
2430
2431/// Map the OpenMP distribute schedule to the runtime enumeration.
2432static OpenMPSchedType
2434 // only static is allowed for dist_schedule
2435 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2436}
2437
2439 bool Chunked) const {
2440 OpenMPSchedType Schedule =
2441 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2442 return Schedule == OMP_sch_static;
2443}
2444
2446 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2447 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2448 return Schedule == OMP_dist_sch_static;
2449}
2450
2452 bool Chunked) const {
2453 OpenMPSchedType Schedule =
2454 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2455 return Schedule == OMP_sch_static_chunked;
2456}
2457
2459 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2460 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2461 return Schedule == OMP_dist_sch_static_chunked;
2462}
2463
2465 OpenMPSchedType Schedule =
2466 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2467 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2468 return Schedule != OMP_sch_static;
2469}
2470
2471static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2474 int Modifier = 0;
2475 switch (M1) {
2476 case OMPC_SCHEDULE_MODIFIER_monotonic:
2477 Modifier = OMP_sch_modifier_monotonic;
2478 break;
2479 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2480 Modifier = OMP_sch_modifier_nonmonotonic;
2481 break;
2482 case OMPC_SCHEDULE_MODIFIER_simd:
2483 if (Schedule == OMP_sch_static_chunked)
2484 Schedule = OMP_sch_static_balanced_chunked;
2485 break;
2488 break;
2489 }
2490 switch (M2) {
2491 case OMPC_SCHEDULE_MODIFIER_monotonic:
2492 Modifier = OMP_sch_modifier_monotonic;
2493 break;
2494 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2495 Modifier = OMP_sch_modifier_nonmonotonic;
2496 break;
2497 case OMPC_SCHEDULE_MODIFIER_simd:
2498 if (Schedule == OMP_sch_static_chunked)
2499 Schedule = OMP_sch_static_balanced_chunked;
2500 break;
2503 break;
2504 }
2505 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2506 // If the static schedule kind is specified or if the ordered clause is
2507 // specified, and if the nonmonotonic modifier is not specified, the effect is
2508 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2509 // modifier is specified, the effect is as if the nonmonotonic modifier is
2510 // specified.
2511 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2512 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2513 Schedule == OMP_sch_static_balanced_chunked ||
2514 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2515 Schedule == OMP_dist_sch_static_chunked ||
2516 Schedule == OMP_dist_sch_static))
2517 Modifier = OMP_sch_modifier_nonmonotonic;
2518 }
2519 return Schedule | Modifier;
2520}
2521
2524 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2525 bool Ordered, const DispatchRTInput &DispatchValues) {
2526 if (!CGF.HaveInsertPoint())
2527 return;
2528 OpenMPSchedType Schedule = getRuntimeSchedule(
2529 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2530 assert(Ordered ||
2531 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2532 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2533 Schedule != OMP_sch_static_balanced_chunked));
2534 // Call __kmpc_dispatch_init(
2535 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2536 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2537 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2538
2539 // If the Chunk was not specified in the clause - use default value 1.
2540 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2541 : CGF.Builder.getIntN(IVSize, 1);
2542 llvm::Value *Args[] = {
2543 emitUpdateLocation(CGF, Loc),
2544 getThreadID(CGF, Loc),
2545 CGF.Builder.getInt32(addMonoNonMonoModifier(
2546 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2547 DispatchValues.LB, // Lower
2548 DispatchValues.UB, // Upper
2549 CGF.Builder.getIntN(IVSize, 1), // Stride
2550 Chunk // Chunk
2551 };
2552 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2553 Args);
2554}
2555
2557 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2558 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2560 const CGOpenMPRuntime::StaticRTInput &Values) {
2561 if (!CGF.HaveInsertPoint())
2562 return;
2563
2564 assert(!Values.Ordered);
2565 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2566 Schedule == OMP_sch_static_balanced_chunked ||
2567 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2568 Schedule == OMP_dist_sch_static ||
2569 Schedule == OMP_dist_sch_static_chunked);
2570
2571 // Call __kmpc_for_static_init(
2572 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2573 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2574 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2575 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2576 llvm::Value *Chunk = Values.Chunk;
2577 if (Chunk == nullptr) {
2578 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2579 Schedule == OMP_dist_sch_static) &&
2580 "expected static non-chunked schedule");
2581 // If the Chunk was not specified in the clause - use default value 1.
2582 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2583 } else {
2584 assert((Schedule == OMP_sch_static_chunked ||
2585 Schedule == OMP_sch_static_balanced_chunked ||
2586 Schedule == OMP_ord_static_chunked ||
2587 Schedule == OMP_dist_sch_static_chunked) &&
2588 "expected static chunked schedule");
2589 }
2590 llvm::Value *Args[] = {
2591 UpdateLocation,
2592 ThreadId,
2593 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2594 M2)), // Schedule type
2595 Values.IL.emitRawPointer(CGF), // &isLastIter
2596 Values.LB.emitRawPointer(CGF), // &LB
2597 Values.UB.emitRawPointer(CGF), // &UB
2598 Values.ST.emitRawPointer(CGF), // &Stride
2599 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2600 Chunk // Chunk
2601 };
2602 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2603}
2604
2606 SourceLocation Loc,
2607 OpenMPDirectiveKind DKind,
2608 const OpenMPScheduleTy &ScheduleKind,
2609 const StaticRTInput &Values) {
2610 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2611 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2612 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2613 "Expected loop-based or sections-based directive.");
2614 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2616 ? OMP_IDENT_WORK_LOOP
2617 : OMP_IDENT_WORK_SECTIONS);
2618 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2619 llvm::FunctionCallee StaticInitFunction =
2620 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2621 false);
2623 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2624 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2625}
2626
2630 const CGOpenMPRuntime::StaticRTInput &Values) {
2631 OpenMPSchedType ScheduleNum =
2632 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2633 llvm::Value *UpdatedLocation =
2634 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2635 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2636 llvm::FunctionCallee StaticInitFunction;
2637 bool isGPUDistribute =
2638 CGM.getLangOpts().OpenMPIsTargetDevice &&
2639 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2640 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2641 Values.IVSize, Values.IVSigned, isGPUDistribute);
2642
2643 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2644 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2646}
2647
2649 SourceLocation Loc,
2650 OpenMPDirectiveKind DKind) {
2651 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2652 DKind == OMPD_sections) &&
2653 "Expected distribute, for, or sections directive kind");
2654 if (!CGF.HaveInsertPoint())
2655 return;
2656 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2657 llvm::Value *Args[] = {
2658 emitUpdateLocation(CGF, Loc,
2660 (DKind == OMPD_target_teams_loop)
2661 ? OMP_IDENT_WORK_DISTRIBUTE
2662 : isOpenMPLoopDirective(DKind)
2663 ? OMP_IDENT_WORK_LOOP
2664 : OMP_IDENT_WORK_SECTIONS),
2665 getThreadID(CGF, Loc)};
2667 if (isOpenMPDistributeDirective(DKind) &&
2668 CGM.getLangOpts().OpenMPIsTargetDevice &&
2669 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2670 CGF.EmitRuntimeCall(
2671 OMPBuilder.getOrCreateRuntimeFunction(
2672 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2673 Args);
2674 else
2675 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2676 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2677 Args);
2678}
2679
2681 SourceLocation Loc,
2682 unsigned IVSize,
2683 bool IVSigned) {
2684 if (!CGF.HaveInsertPoint())
2685 return;
2686 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2687 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2688 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2689 Args);
2690}
2691
2693 SourceLocation Loc, unsigned IVSize,
2694 bool IVSigned, Address IL,
2695 Address LB, Address UB,
2696 Address ST) {
2697 // Call __kmpc_dispatch_next(
2698 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2699 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2700 // kmp_int[32|64] *p_stride);
2701 llvm::Value *Args[] = {
2702 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2703 IL.emitRawPointer(CGF), // &isLastIter
2704 LB.emitRawPointer(CGF), // &Lower
2705 UB.emitRawPointer(CGF), // &Upper
2706 ST.emitRawPointer(CGF) // &Stride
2707 };
2708 llvm::Value *Call = CGF.EmitRuntimeCall(
2709 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2710 return CGF.EmitScalarConversion(
2711 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2712 CGF.getContext().BoolTy, Loc);
2713}
2714
2716 llvm::Value *NumThreads,
2717 SourceLocation Loc) {
2718 if (!CGF.HaveInsertPoint())
2719 return;
2720 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2721 llvm::Value *Args[] = {
2722 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2723 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2724 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2725 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2726 Args);
2727}
2728
2730 ProcBindKind ProcBind,
2731 SourceLocation Loc) {
2732 if (!CGF.HaveInsertPoint())
2733 return;
2734 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2735 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2736 llvm::Value *Args[] = {
2737 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2738 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2739 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2740 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2741 Args);
2742}
2743
2745 SourceLocation Loc, llvm::AtomicOrdering AO) {
2746 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2747 OMPBuilder.createFlush(CGF.Builder);
2748 } else {
2749 if (!CGF.HaveInsertPoint())
2750 return;
2751 // Build call void __kmpc_flush(ident_t *loc)
2752 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2753 CGM.getModule(), OMPRTL___kmpc_flush),
2754 emitUpdateLocation(CGF, Loc));
2755 }
2756}
2757
2758namespace {
2759/// Indexes of fields for type kmp_task_t.
2760enum KmpTaskTFields {
2761 /// List of shared variables.
2762 KmpTaskTShareds,
2763 /// Task routine.
2764 KmpTaskTRoutine,
2765 /// Partition id for the untied tasks.
2766 KmpTaskTPartId,
2767 /// Function with call of destructors for private variables.
2768 Data1,
2769 /// Task priority.
2770 Data2,
2771 /// (Taskloops only) Lower bound.
2772 KmpTaskTLowerBound,
2773 /// (Taskloops only) Upper bound.
2774 KmpTaskTUpperBound,
2775 /// (Taskloops only) Stride.
2776 KmpTaskTStride,
2777 /// (Taskloops only) Is last iteration flag.
2778 KmpTaskTLastIter,
2779 /// (Taskloops only) Reduction data.
2780 KmpTaskTReductions,
2781};
2782} // anonymous namespace
2783
2785 // If we are in simd mode or there are no entries, we don't need to do
2786 // anything.
2787 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2788 return;
2789
2790 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2791 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2792 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2793 SourceLocation Loc;
2794 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2795 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2797 I != E; ++I) {
2798 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2799 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2801 I->getFirst(), EntryInfo.Line, 1);
2802 break;
2803 }
2804 }
2805 }
2806 switch (Kind) {
2807 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2808 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2809 DiagnosticsEngine::Error, "Offloading entry for target region in "
2810 "%0 is incorrect: either the "
2811 "address or the ID is invalid.");
2812 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2813 } break;
2814 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2815 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2816 DiagnosticsEngine::Error, "Offloading entry for declare target "
2817 "variable %0 is incorrect: the "
2818 "address is invalid.");
2819 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2820 } break;
2821 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2822 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2824 "Offloading entry for declare target variable is incorrect: the "
2825 "address is invalid.");
2826 CGM.getDiags().Report(DiagID);
2827 } break;
2828 }
2829 };
2830
2831 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2832}
2833
2835 if (!KmpRoutineEntryPtrTy) {
2836 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2838 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2840 KmpRoutineEntryPtrQTy = C.getPointerType(
2841 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2843 }
2844}
2845
2846namespace {
2847struct PrivateHelpersTy {
2848 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2849 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2850 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2851 PrivateElemInit(PrivateElemInit) {}
2852 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2853 const Expr *OriginalRef = nullptr;
2854 const VarDecl *Original = nullptr;
2855 const VarDecl *PrivateCopy = nullptr;
2856 const VarDecl *PrivateElemInit = nullptr;
2857 bool isLocalPrivate() const {
2858 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2859 }
2860};
2861typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2862} // anonymous namespace
2863
2864static bool isAllocatableDecl(const VarDecl *VD) {
2865 const VarDecl *CVD = VD->getCanonicalDecl();
2866 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2867 return false;
2868 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2869 // Use the default allocation.
2870 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2871 !AA->getAllocator());
2872}
2873
2874static RecordDecl *
2876 if (!Privates.empty()) {
2877 ASTContext &C = CGM.getContext();
2878 // Build struct .kmp_privates_t. {
2879 // /* private vars */
2880 // };
2881 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2882 RD->startDefinition();
2883 for (const auto &Pair : Privates) {
2884 const VarDecl *VD = Pair.second.Original;
2886 // If the private variable is a local variable with lvalue ref type,
2887 // allocate the pointer instead of the pointee type.
2888 if (Pair.second.isLocalPrivate()) {
2889 if (VD->getType()->isLValueReferenceType())
2890 Type = C.getPointerType(Type);
2891 if (isAllocatableDecl(VD))
2892 Type = C.getPointerType(Type);
2893 }
2895 if (VD->hasAttrs()) {
2896 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2897 E(VD->getAttrs().end());
2898 I != E; ++I)
2899 FD->addAttr(*I);
2900 }
2901 }
2902 RD->completeDefinition();
2903 return RD;
2904 }
2905 return nullptr;
2906}
2907
2908static RecordDecl *
2910 QualType KmpInt32Ty,
2911 QualType KmpRoutineEntryPointerQTy) {
2912 ASTContext &C = CGM.getContext();
2913 // Build struct kmp_task_t {
2914 // void * shareds;
2915 // kmp_routine_entry_t routine;
2916 // kmp_int32 part_id;
2917 // kmp_cmplrdata_t data1;
2918 // kmp_cmplrdata_t data2;
2919 // For taskloops additional fields:
2920 // kmp_uint64 lb;
2921 // kmp_uint64 ub;
2922 // kmp_int64 st;
2923 // kmp_int32 liter;
2924 // void * reductions;
2925 // };
2926 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2927 UD->startDefinition();
2928 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2929 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2930 UD->completeDefinition();
2931 QualType KmpCmplrdataTy = C.getRecordType(UD);
2932 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2933 RD->startDefinition();
2934 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2935 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2936 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2937 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2938 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2939 if (isOpenMPTaskLoopDirective(Kind)) {
2940 QualType KmpUInt64Ty =
2941 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2942 QualType KmpInt64Ty =
2943 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2944 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2945 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2946 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2947 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2949 }
2950 RD->completeDefinition();
2951 return RD;
2952}
2953
2954static RecordDecl *
2956 ArrayRef<PrivateDataTy> Privates) {
2957 ASTContext &C = CGM.getContext();
2958 // Build struct kmp_task_t_with_privates {
2959 // kmp_task_t task_data;
2960 // .kmp_privates_t. privates;
2961 // };
2962 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2963 RD->startDefinition();
2964 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2965 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2966 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2967 RD->completeDefinition();
2968 return RD;
2969}
2970
2971/// Emit a proxy function which accepts kmp_task_t as the second
2972/// argument.
2973/// \code
2974/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2975/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2976/// For taskloops:
2977/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2978/// tt->reductions, tt->shareds);
2979/// return 0;
2980/// }
2981/// \endcode
2982static llvm::Function *
2984 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2985 QualType KmpTaskTWithPrivatesPtrQTy,
2986 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2987 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2988 llvm::Value *TaskPrivatesMap) {
2989 ASTContext &C = CGM.getContext();
2990 FunctionArgList Args;
2991 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2993 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2994 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2996 Args.push_back(&GtidArg);
2997 Args.push_back(&TaskTypeArg);
2998 const auto &TaskEntryFnInfo =
2999 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3000 llvm::FunctionType *TaskEntryTy =
3001 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3002 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3003 auto *TaskEntry = llvm::Function::Create(
3004 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3005 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3006 TaskEntry->setDoesNotRecurse();
3007 CodeGenFunction CGF(CGM);
3008 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3009 Loc, Loc);
3010
3011 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3012 // tt,
3013 // For taskloops:
3014 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3015 // tt->task_data.shareds);
3016 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3017 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3018 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3019 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3020 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3021 const auto *KmpTaskTWithPrivatesQTyRD =
3022 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3023 LValue Base =
3024 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3025 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3026 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3027 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3028 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3029
3030 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3031 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3032 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3033 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3034 CGF.ConvertTypeForMem(SharedsPtrTy));
3035
3036 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3037 llvm::Value *PrivatesParam;
3038 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3039 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3040 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3041 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3042 } else {
3043 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3044 }
3045
3046 llvm::Value *CommonArgs[] = {
3047 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3048 CGF.Builder
3050 CGF.VoidPtrTy, CGF.Int8Ty)
3051 .emitRawPointer(CGF)};
3052 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3053 std::end(CommonArgs));
3054 if (isOpenMPTaskLoopDirective(Kind)) {
3055 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3056 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3057 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3058 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3059 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3060 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3061 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3062 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3063 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3064 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3065 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3066 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3067 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3068 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3069 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3070 CallArgs.push_back(LBParam);
3071 CallArgs.push_back(UBParam);
3072 CallArgs.push_back(StParam);
3073 CallArgs.push_back(LIParam);
3074 CallArgs.push_back(RParam);
3075 }
3076 CallArgs.push_back(SharedsParam);
3077
3078 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3079 CallArgs);
3080 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3081 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3082 CGF.FinishFunction();
3083 return TaskEntry;
3084}
3085
3087 SourceLocation Loc,
3088 QualType KmpInt32Ty,
3089 QualType KmpTaskTWithPrivatesPtrQTy,
3090 QualType KmpTaskTWithPrivatesQTy) {
3091 ASTContext &C = CGM.getContext();
3092 FunctionArgList Args;
3093 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3095 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3096 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3098 Args.push_back(&GtidArg);
3099 Args.push_back(&TaskTypeArg);
3100 const auto &DestructorFnInfo =
3101 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3102 llvm::FunctionType *DestructorFnTy =
3103 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3104 std::string Name =
3105 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3106 auto *DestructorFn =
3107 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3108 Name, &CGM.getModule());
3109 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3110 DestructorFnInfo);
3111 DestructorFn->setDoesNotRecurse();
3112 CodeGenFunction CGF(CGM);
3113 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3114 Args, Loc, Loc);
3115
3117 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3118 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3119 const auto *KmpTaskTWithPrivatesQTyRD =
3120 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3121 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3122 Base = CGF.EmitLValueForField(Base, *FI);
3123 for (const auto *Field :
3124 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3125 if (QualType::DestructionKind DtorKind =
3126 Field->getType().isDestructedType()) {
3127 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3128 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3129 }
3130 }
3131 CGF.FinishFunction();
3132 return DestructorFn;
3133}
3134
3135/// Emit a privates mapping function for correct handling of private and
3136/// firstprivate variables.
3137/// \code
3138/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3139/// **noalias priv1,..., <tyn> **noalias privn) {
3140/// *priv1 = &.privates.priv1;
3141/// ...;
3142/// *privn = &.privates.privn;
3143/// }
3144/// \endcode
3145static llvm::Value *
3147 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3148 ArrayRef<PrivateDataTy> Privates) {
3149 ASTContext &C = CGM.getContext();
3150 FunctionArgList Args;
3151 ImplicitParamDecl TaskPrivatesArg(
3152 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3153 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3155 Args.push_back(&TaskPrivatesArg);
3156 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3157 unsigned Counter = 1;
3158 for (const Expr *E : Data.PrivateVars) {
3159 Args.push_back(ImplicitParamDecl::Create(
3160 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3161 C.getPointerType(C.getPointerType(E->getType()))
3162 .withConst()
3163 .withRestrict(),
3165 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3166 PrivateVarsPos[VD] = Counter;
3167 ++Counter;
3168 }
3169 for (const Expr *E : Data.FirstprivateVars) {
3170 Args.push_back(ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172 C.getPointerType(C.getPointerType(E->getType()))
3173 .withConst()
3174 .withRestrict(),
3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.LastprivateVars) {
3181 Args.push_back(ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183 C.getPointerType(C.getPointerType(E->getType()))
3184 .withConst()
3185 .withRestrict(),
3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const VarDecl *VD : Data.PrivateLocals) {
3193 if (VD->getType()->isLValueReferenceType())
3194 Ty = C.getPointerType(Ty);
3195 if (isAllocatableDecl(VD))
3196 Ty = C.getPointerType(Ty);
3197 Args.push_back(ImplicitParamDecl::Create(
3198 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3199 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3201 PrivateVarsPos[VD] = Counter;
3202 ++Counter;
3203 }
3204 const auto &TaskPrivatesMapFnInfo =
3205 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3206 llvm::FunctionType *TaskPrivatesMapTy =
3207 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3208 std::string Name =
3209 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3210 auto *TaskPrivatesMap = llvm::Function::Create(
3211 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3212 &CGM.getModule());
3213 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3214 TaskPrivatesMapFnInfo);
3215 if (CGM.getLangOpts().Optimize) {
3216 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3217 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3218 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3219 }
3220 CodeGenFunction CGF(CGM);
3221 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3222 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3223
3224 // *privi = &.privates.privi;
3226 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3227 TaskPrivatesArg.getType()->castAs<PointerType>());
3228 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3229 Counter = 0;
3230 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3231 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3232 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3233 LValue RefLVal =
3234 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3235 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3236 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3237 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3238 ++Counter;
3239 }
3240 CGF.FinishFunction();
3241 return TaskPrivatesMap;
3242}
3243
3244/// Emit initialization for private variables in task-based directives.
3246 const OMPExecutableDirective &D,
3247 Address KmpTaskSharedsPtr, LValue TDBase,
3248 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3249 QualType SharedsTy, QualType SharedsPtrTy,
3250 const OMPTaskDataTy &Data,
3251 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3252 ASTContext &C = CGF.getContext();
3253 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3254 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3256 ? OMPD_taskloop
3257 : OMPD_task;
3258 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3259 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3260 LValue SrcBase;
3261 bool IsTargetTask =
3264 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3265 // PointersArray, SizesArray, and MappersArray. The original variables for
3266 // these arrays are not captured and we get their addresses explicitly.
3267 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3268 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3269 SrcBase = CGF.MakeAddrLValue(
3271 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3272 CGF.ConvertTypeForMem(SharedsTy)),
3273 SharedsTy);
3274 }
3275 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3276 for (const PrivateDataTy &Pair : Privates) {
3277 // Do not initialize private locals.
3278 if (Pair.second.isLocalPrivate()) {
3279 ++FI;
3280 continue;
3281 }
3282 const VarDecl *VD = Pair.second.PrivateCopy;
3283 const Expr *Init = VD->getAnyInitializer();
3284 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3285 !CGF.isTrivialInitializer(Init)))) {
3286 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3287 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3288 const VarDecl *OriginalVD = Pair.second.Original;
3289 // Check if the variable is the target-based BasePointersArray,
3290 // PointersArray, SizesArray, or MappersArray.
3291 LValue SharedRefLValue;
3292 QualType Type = PrivateLValue.getType();
3293 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3294 if (IsTargetTask && !SharedField) {
3295 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3296 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3297 cast<CapturedDecl>(OriginalVD->getDeclContext())
3298 ->getNumParams() == 0 &&
3299 isa<TranslationUnitDecl>(
3300 cast<CapturedDecl>(OriginalVD->getDeclContext())
3301 ->getDeclContext()) &&
3302 "Expected artificial target data variable.");
3303 SharedRefLValue =
3304 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3305 } else if (ForDup) {
3306 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3307 SharedRefLValue = CGF.MakeAddrLValue(
3308 SharedRefLValue.getAddress(CGF).withAlignment(
3309 C.getDeclAlign(OriginalVD)),
3310 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3311 SharedRefLValue.getTBAAInfo());
3312 } else if (CGF.LambdaCaptureFields.count(
3313 Pair.second.Original->getCanonicalDecl()) > 0 ||
3314 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3315 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3316 } else {
3317 // Processing for implicitly captured variables.
3318 InlinedOpenMPRegionRAII Region(
3319 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3320 /*HasCancel=*/false, /*NoInheritance=*/true);
3321 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3322 }
3323 if (Type->isArrayType()) {
3324 // Initialize firstprivate array.
3325 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3326 // Perform simple memcpy.
3327 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3328 } else {
3329 // Initialize firstprivate array using element-by-element
3330 // initialization.
3332 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3333 Type,
3334 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3335 Address SrcElement) {
3336 // Clean up any temporaries needed by the initialization.
3337 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3338 InitScope.addPrivate(Elem, SrcElement);
3339 (void)InitScope.Privatize();
3340 // Emit initialization for single element.
3341 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3342 CGF, &CapturesInfo);
3343 CGF.EmitAnyExprToMem(Init, DestElement,
3344 Init->getType().getQualifiers(),
3345 /*IsInitializer=*/false);
3346 });
3347 }
3348 } else {
3349 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3350 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3351 (void)InitScope.Privatize();
3352 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3353 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3354 /*capturedByInit=*/false);
3355 }
3356 } else {
3357 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3358 }
3359 }
3360 ++FI;
3361 }
3362}
3363
3364/// Check if duplication function is required for taskloops.
3366 ArrayRef<PrivateDataTy> Privates) {
3367 bool InitRequired = false;
3368 for (const PrivateDataTy &Pair : Privates) {
3369 if (Pair.second.isLocalPrivate())
3370 continue;
3371 const VarDecl *VD = Pair.second.PrivateCopy;
3372 const Expr *Init = VD->getAnyInitializer();
3373 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3375 if (InitRequired)
3376 break;
3377 }
3378 return InitRequired;
3379}
3380
3381
3382/// Emit task_dup function (for initialization of
3383/// private/firstprivate/lastprivate vars and last_iter flag)
3384/// \code
3385/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3386/// lastpriv) {
3387/// // setup lastprivate flag
3388/// task_dst->last = lastpriv;
3389/// // could be constructor calls here...
3390/// }
3391/// \endcode
3392static llvm::Value *
3394 const OMPExecutableDirective &D,
3395 QualType KmpTaskTWithPrivatesPtrQTy,
3396 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3397 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3398 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3399 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3400 ASTContext &C = CGM.getContext();
3401 FunctionArgList Args;
3402 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3403 KmpTaskTWithPrivatesPtrQTy,
3405 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3406 KmpTaskTWithPrivatesPtrQTy,
3408 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3410 Args.push_back(&DstArg);
3411 Args.push_back(&SrcArg);
3412 Args.push_back(&LastprivArg);
3413 const auto &TaskDupFnInfo =
3414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3415 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3416 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3417 auto *TaskDup = llvm::Function::Create(
3418 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3419 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3420 TaskDup->setDoesNotRecurse();
3421 CodeGenFunction CGF(CGM);
3422 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3423 Loc);
3424
3425 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3426 CGF.GetAddrOfLocalVar(&DstArg),
3427 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3428 // task_dst->liter = lastpriv;
3429 if (WithLastIter) {
3430 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3432 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3433 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3434 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3435 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3436 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3437 }
3438
3439 // Emit initial values for private copies (if any).
3440 assert(!Privates.empty());
3441 Address KmpTaskSharedsPtr = Address::invalid();
3442 if (!Data.FirstprivateVars.empty()) {
3443 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3444 CGF.GetAddrOfLocalVar(&SrcArg),
3445 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3447 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3448 KmpTaskSharedsPtr = Address(
3450 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3451 KmpTaskTShareds)),
3452 Loc),
3453 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3454 }
3455 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3456 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3457 CGF.FinishFunction();
3458 return TaskDup;
3459}
3460
3461/// Checks if destructor function is required to be generated.
3462/// \return true if cleanups are required, false otherwise.
3463static bool
3464checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3465 ArrayRef<PrivateDataTy> Privates) {
3466 for (const PrivateDataTy &P : Privates) {
3467 if (P.second.isLocalPrivate())
3468 continue;
3469 QualType Ty = P.second.Original->getType().getNonReferenceType();
3470 if (Ty.isDestructedType())
3471 return true;
3472 }
3473 return false;
3474}
3475
3476namespace {
3477/// Loop generator for OpenMP iterator expression.
3478class OMPIteratorGeneratorScope final
3479 : public CodeGenFunction::OMPPrivateScope {
3480 CodeGenFunction &CGF;
3481 const OMPIteratorExpr *E = nullptr;
3484 OMPIteratorGeneratorScope() = delete;
3485 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3486
3487public:
3488 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3489 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3490 if (!E)
3491 return;
3493 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3494 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3495 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3496 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3497 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3498 addPrivate(
3499 HelperData.CounterVD,
3500 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3501 }
3502 Privatize();
3503
3504 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3505 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3506 LValue CLVal =
3507 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3508 HelperData.CounterVD->getType());
3509 // Counter = 0;
3511 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3512 CLVal);
3513 CodeGenFunction::JumpDest &ContDest =
3514 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3515 CodeGenFunction::JumpDest &ExitDest =
3516 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3517 // N = <number-of_iterations>;
3518 llvm::Value *N = Uppers[I];
3519 // cont:
3520 // if (Counter < N) goto body; else goto exit;
3521 CGF.EmitBlock(ContDest.getBlock());
3522 auto *CVal =
3523 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3524 llvm::Value *Cmp =
3526 ? CGF.Builder.CreateICmpSLT(CVal, N)
3527 : CGF.Builder.CreateICmpULT(CVal, N);
3528 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3529 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3530 // body:
3531 CGF.EmitBlock(BodyBB);
3532 // Iteri = Begini + Counter * Stepi;
3533 CGF.EmitIgnoredExpr(HelperData.Update);
3534 }
3535 }
3536 ~OMPIteratorGeneratorScope() {
3537 if (!E)
3538 return;
3539 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3540 // Counter = Counter + 1;
3541 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3542 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3543 // goto cont;
3544 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3545 // exit:
3546 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3547 }
3548 }
3549};
3550} // namespace
3551
3552static std::pair<llvm::Value *, llvm::Value *>
3554 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3555 llvm::Value *Addr;
3556 if (OASE) {
3557 const Expr *Base = OASE->getBase();
3558 Addr = CGF.EmitScalarExpr(Base);
3559 } else {
3560 Addr = CGF.EmitLValue(E).getPointer(CGF);
3561 }
3562 llvm::Value *SizeVal;
3563 QualType Ty = E->getType();
3564 if (OASE) {
3565 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3566 for (const Expr *SE : OASE->getDimensions()) {
3567 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3568 Sz = CGF.EmitScalarConversion(
3569 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3570 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3571 }
3572 } else if (const auto *ASE =
3573 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3574 LValue UpAddrLVal =
3575 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3576 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3577 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3578 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3579 /*Idx0=*/1);
3580 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3581 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3582 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3583 } else {
3584 SizeVal = CGF.getTypeSize(Ty);
3585 }
3586 return std::make_pair(Addr, SizeVal);
3587}
3588
3589/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3590static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3591 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3592 if (KmpTaskAffinityInfoTy.isNull()) {
3593 RecordDecl *KmpAffinityInfoRD =
3594 C.buildImplicitRecord("kmp_task_affinity_info_t");
3595 KmpAffinityInfoRD->startDefinition();
3596 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3597 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3598 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3599 KmpAffinityInfoRD->completeDefinition();
3600 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3601 }
3602}
3603
3606 const OMPExecutableDirective &D,
3607 llvm::Function *TaskFunction, QualType SharedsTy,
3608 Address Shareds, const OMPTaskDataTy &Data) {
3611 // Aggregate privates and sort them by the alignment.
3612 const auto *I = Data.PrivateCopies.begin();
3613 for (const Expr *E : Data.PrivateVars) {
3614 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3615 Privates.emplace_back(
3616 C.getDeclAlign(VD),
3617 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3618 /*PrivateElemInit=*/nullptr));
3619 ++I;
3620 }
3621 I = Data.FirstprivateCopies.begin();
3622 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3623 for (const Expr *E : Data.FirstprivateVars) {
3624 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3625 Privates.emplace_back(
3626 C.getDeclAlign(VD),
3627 PrivateHelpersTy(
3628 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3629 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3630 ++I;
3631 ++IElemInitRef;
3632 }
3633 I = Data.LastprivateCopies.begin();
3634 for (const Expr *E : Data.LastprivateVars) {
3635 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3636 Privates.emplace_back(
3637 C.getDeclAlign(VD),
3638 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3639 /*PrivateElemInit=*/nullptr));
3640 ++I;
3641 }
3642 for (const VarDecl *VD : Data.PrivateLocals) {
3643 if (isAllocatableDecl(VD))
3644 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3645 else
3646 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3647 }
3648 llvm::stable_sort(Privates,
3649 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3650 return L.first > R.first;
3651 });
3652 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3653 // Build type kmp_routine_entry_t (if not built yet).
3654 emitKmpRoutineEntryT(KmpInt32Ty);
3655 // Build type kmp_task_t (if not built yet).
3659 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3660 }
3662 } else {
3663 assert((D.getDirectiveKind() == OMPD_task ||
3666 "Expected taskloop, task or target directive");
3667 if (SavedKmpTaskTQTy.isNull()) {
3669 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3670 }
3672 }
3673 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3674 // Build particular struct kmp_task_t for the given task.
3675 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3677 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3678 QualType KmpTaskTWithPrivatesPtrQTy =
3679 C.getPointerType(KmpTaskTWithPrivatesQTy);
3680 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3681 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3682 KmpTaskTWithPrivatesTy->getPointerTo();
3683 llvm::Value *KmpTaskTWithPrivatesTySize =
3684 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3685 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3686
3687 // Emit initial values for private copies (if any).
3688 llvm::Value *TaskPrivatesMap = nullptr;
3689 llvm::Type *TaskPrivatesMapTy =
3690 std::next(TaskFunction->arg_begin(), 3)->getType();
3691 if (!Privates.empty()) {
3692 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3693 TaskPrivatesMap =
3694 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3695 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3696 TaskPrivatesMap, TaskPrivatesMapTy);
3697 } else {
3698 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3699 cast<llvm::PointerType>(TaskPrivatesMapTy));
3700 }
3701 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3702 // kmp_task_t *tt);
3703 llvm::Function *TaskEntry = emitProxyTaskFunction(
3704 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3705 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3706 TaskPrivatesMap);
3707
3708 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3709 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3710 // kmp_routine_entry_t *task_entry);
3711 // Task flags. Format is taken from
3712 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3713 // description of kmp_tasking_flags struct.
3714 enum {
3715 TiedFlag = 0x1,
3716 FinalFlag = 0x2,
3717 DestructorsFlag = 0x8,
3718 PriorityFlag = 0x20,
3719 DetachableFlag = 0x40,
3720 };
3721 unsigned Flags = Data.Tied ? TiedFlag : 0;
3722 bool NeedsCleanup = false;
3723 if (!Privates.empty()) {
3724 NeedsCleanup =
3725 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3726 if (NeedsCleanup)
3727 Flags = Flags | DestructorsFlag;
3728 }
3729 if (Data.Priority.getInt())
3730 Flags = Flags | PriorityFlag;
3732 Flags = Flags | DetachableFlag;
3733 llvm::Value *TaskFlags =
3734 Data.Final.getPointer()
3735 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3736 CGF.Builder.getInt32(FinalFlag),
3737 CGF.Builder.getInt32(/*C=*/0))
3738 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3739 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3740 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3742 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3744 TaskEntry, KmpRoutineEntryPtrTy)};
3745 llvm::Value *NewTask;
3747 // Check if we have any device clause associated with the directive.
3748 const Expr *Device = nullptr;
3749 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3750 Device = C->getDevice();
3751 // Emit device ID if any otherwise use default value.
3752 llvm::Value *DeviceID;
3753 if (Device)
3754 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3755 CGF.Int64Ty, /*isSigned=*/true);
3756 else
3757 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3758 AllocArgs.push_back(DeviceID);
3759 NewTask = CGF.EmitRuntimeCall(
3760 OMPBuilder.getOrCreateRuntimeFunction(
3761 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3762 AllocArgs);
3763 } else {
3764 NewTask =
3765 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3766 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3767 AllocArgs);
3768 }
3769 // Emit detach clause initialization.
3770 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3771 // task_descriptor);
3772 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3773 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3774 LValue EvtLVal = CGF.EmitLValue(Evt);
3775
3776 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3777 // int gtid, kmp_task_t *task);
3778 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3779 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3780 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3781 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3782 OMPBuilder.getOrCreateRuntimeFunction(
3783 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3784 {Loc, Tid, NewTask});
3785 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3786 Evt->getExprLoc());
3787 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3788 }
3789 // Process affinity clauses.
3791 // Process list of affinity data.
3793 Address AffinitiesArray = Address::invalid();
3794 // Calculate number of elements to form the array of affinity data.
3795 llvm::Value *NumOfElements = nullptr;
3796 unsigned NumAffinities = 0;
3797 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3798 if (const Expr *Modifier = C->getModifier()) {
3799 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3800 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3801 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3802 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3803 NumOfElements =
3804 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3805 }
3806 } else {
3807 NumAffinities += C->varlist_size();
3808 }
3809 }
3811 // Fields ids in kmp_task_affinity_info record.
3812 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3813
3814 QualType KmpTaskAffinityInfoArrayTy;
3815 if (NumOfElements) {
3816 NumOfElements = CGF.Builder.CreateNUWAdd(
3817 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3818 auto *OVE = new (C) OpaqueValueExpr(
3819 Loc,
3820 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3821 VK_PRValue);
3822 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3823 RValue::get(NumOfElements));
3824 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3826 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3827 // Properly emit variable-sized array.
3828 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3830 CGF.EmitVarDecl(*PD);
3831 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3832 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3833 /*isSigned=*/false);
3834 } else {
3835 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3837 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3838 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3839 AffinitiesArray =
3840 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3841 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3842 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3843 /*isSigned=*/false);
3844 }
3845
3846 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3847 // Fill array by elements without iterators.
3848 unsigned Pos = 0;
3849 bool HasIterator = false;
3850 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3851 if (C->getModifier()) {
3852 HasIterator = true;
3853 continue;
3854 }
3855 for (const Expr *E : C->varlists()) {
3856 llvm::Value *Addr;
3857 llvm::Value *Size;
3858 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3859 LValue Base =
3860 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3862 // affs[i].base_addr = &<Affinities[i].second>;
3863 LValue BaseAddrLVal = CGF.EmitLValueForField(
3864 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3865 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3866 BaseAddrLVal);
3867 // affs[i].len = sizeof(<Affinities[i].second>);
3868 LValue LenLVal = CGF.EmitLValueForField(
3869 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3870 CGF.EmitStoreOfScalar(Size, LenLVal);
3871 ++Pos;
3872 }
3873 }
3874 LValue PosLVal;
3875 if (HasIterator) {
3876 PosLVal = CGF.MakeAddrLValue(
3877 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3878 C.getSizeType());
3879 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3880 }
3881 // Process elements with iterators.
3882 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3883 const Expr *Modifier = C->getModifier();
3884 if (!Modifier)
3885 continue;
3886 OMPIteratorGeneratorScope IteratorScope(
3887 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3888 for (const Expr *E : C->varlists()) {
3889 llvm::Value *Addr;
3890 llvm::Value *Size;
3891 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3892 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3893 LValue Base =
3894 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3896 // affs[i].base_addr = &<Affinities[i].second>;
3897 LValue BaseAddrLVal = CGF.EmitLValueForField(
3898 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3899 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3900 BaseAddrLVal);
3901 // affs[i].len = sizeof(<Affinities[i].second>);
3902 LValue LenLVal = CGF.EmitLValueForField(
3903 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3904 CGF.EmitStoreOfScalar(Size, LenLVal);
3905 Idx = CGF.Builder.CreateNUWAdd(
3906 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3907 CGF.EmitStoreOfScalar(Idx, PosLVal);
3908 }
3909 }
3910 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3911 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3912 // naffins, kmp_task_affinity_info_t *affin_list);
3913 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3914 llvm::Value *GTid = getThreadID(CGF, Loc);
3915 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3916 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3917 // FIXME: Emit the function and ignore its result for now unless the
3918 // runtime function is properly implemented.
3919 (void)CGF.EmitRuntimeCall(
3920 OMPBuilder.getOrCreateRuntimeFunction(
3921 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3922 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3923 }
3924 llvm::Value *NewTaskNewTaskTTy =
3926 NewTask, KmpTaskTWithPrivatesPtrTy);
3927 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3928 KmpTaskTWithPrivatesQTy);
3929 LValue TDBase =
3930 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3931 // Fill the data in the resulting kmp_task_t record.
3932 // Copy shareds if there are any.
3933 Address KmpTaskSharedsPtr = Address::invalid();
3934 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3935 KmpTaskSharedsPtr = Address(
3936 CGF.EmitLoadOfScalar(
3938 TDBase,
3939 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3940 Loc),
3941 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3942 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3943 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3944 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3945 }
3946 // Emit initial values for private copies (if any).
3948 if (!Privates.empty()) {
3949 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3950 SharedsTy, SharedsPtrTy, Data, Privates,
3951 /*ForDup=*/false);
3953 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3954 Result.TaskDupFn = emitTaskDupFunction(
3955 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3956 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3957 /*WithLastIter=*/!Data.LastprivateVars.empty());
3958 }
3959 }
3960 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3961 enum { Priority = 0, Destructors = 1 };
3962 // Provide pointer to function with destructors for privates.
3963 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3964 const RecordDecl *KmpCmplrdataUD =
3965 (*FI)->getType()->getAsUnionType()->getDecl();
3966 if (NeedsCleanup) {
3967 llvm::Value *DestructorFn = emitDestructorsFunction(
3968 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3969 KmpTaskTWithPrivatesQTy);
3970 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3971 LValue DestructorsLV = CGF.EmitLValueForField(
3972 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3974 DestructorFn, KmpRoutineEntryPtrTy),
3975 DestructorsLV);
3976 }
3977 // Set priority.
3978 if (Data.Priority.getInt()) {
3979 LValue Data2LV = CGF.EmitLValueForField(
3980 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3981 LValue PriorityLV = CGF.EmitLValueForField(
3982 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3983 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3984 }
3985 Result.NewTask = NewTask;
3986 Result.TaskEntry = TaskEntry;
3987 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3988 Result.TDBase = TDBase;
3989 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3990 return Result;
3991}
3992
3993/// Translates internal dependency kind into the runtime kind.
3995 RTLDependenceKindTy DepKind;
3996 switch (K) {
3997 case OMPC_DEPEND_in:
3998 DepKind = RTLDependenceKindTy::DepIn;
3999 break;
4000 // Out and InOut dependencies must use the same code.
4001 case OMPC_DEPEND_out:
4002 case OMPC_DEPEND_inout:
4003 DepKind = RTLDependenceKindTy::DepInOut;
4004 break;
4005 case OMPC_DEPEND_mutexinoutset:
4006 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4007 break;
4008 case OMPC_DEPEND_inoutset:
4009 DepKind = RTLDependenceKindTy::DepInOutSet;
4010 break;
4011 case OMPC_DEPEND_outallmemory:
4012 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4013 break;
4014 case OMPC_DEPEND_source:
4015 case OMPC_DEPEND_sink:
4016 case OMPC_DEPEND_depobj:
4017 case OMPC_DEPEND_inoutallmemory:
4019 llvm_unreachable("Unknown task dependence type");
4020 }
4021 return DepKind;
4022}
4023
4024/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4025static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4026 QualType &FlagsTy) {
4027 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4028 if (KmpDependInfoTy.isNull()) {
4029 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4030 KmpDependInfoRD->startDefinition();
4031 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4032 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4033 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4034 KmpDependInfoRD->completeDefinition();
4035 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4036 }
4037}
4038
4039std::pair<llvm::Value *, LValue>
4041 SourceLocation Loc) {
4043 QualType FlagsTy;
4044 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4045 RecordDecl *KmpDependInfoRD =
4046 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4047 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4049 DepobjLVal.getAddress(CGF).withElementType(
4050 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4051 KmpDependInfoPtrTy->castAs<PointerType>());
4052 Address DepObjAddr = CGF.Builder.CreateGEP(
4053 CGF, Base.getAddress(CGF),
4054 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4055 LValue NumDepsBase = CGF.MakeAddrLValue(
4056 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4057 // NumDeps = deps[i].base_addr;
4058 LValue BaseAddrLVal = CGF.EmitLValueForField(
4059 NumDepsBase,
4060 *std::next(KmpDependInfoRD->field_begin(),
4061 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4062 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4063 return std::make_pair(NumDeps, Base);
4064}
4065
4066static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4067 llvm::PointerUnion<unsigned *, LValue *> Pos,
4069 Address DependenciesArray) {
4070 CodeGenModule &CGM = CGF.CGM;
4071 ASTContext &C = CGM.getContext();
4072 QualType FlagsTy;
4073 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4074 RecordDecl *KmpDependInfoRD =
4075 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4076 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4077
4078 OMPIteratorGeneratorScope IteratorScope(
4079 CGF, cast_or_null<OMPIteratorExpr>(
4080 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4081 : nullptr));
4082 for (const Expr *E : Data.DepExprs) {
4083 llvm::Value *Addr;
4084 llvm::Value *Size;
4085
4086 // The expression will be a nullptr in the 'omp_all_memory' case.
4087 if (E) {
4088 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4089 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4090 } else {
4091 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4092 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4093 }
4094 LValue Base;
4095 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4096 Base = CGF.MakeAddrLValue(
4097 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4098 } else {
4099 assert(E && "Expected a non-null expression");
4100 LValue &PosLVal = *Pos.get<LValue *>();
4101 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4102 Base = CGF.MakeAddrLValue(
4103 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4104 }
4105 // deps[i].base_addr = &<Dependencies[i].second>;
4106 LValue BaseAddrLVal = CGF.EmitLValueForField(
4107 Base,
4108 *std::next(KmpDependInfoRD->field_begin(),
4109 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4110 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4111 // deps[i].len = sizeof(<Dependencies[i].second>);
4112 LValue LenLVal = CGF.EmitLValueForField(
4113 Base, *std::next(KmpDependInfoRD->field_begin(),
4114 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4115 CGF.EmitStoreOfScalar(Size, LenLVal);
4116 // deps[i].flags = <Dependencies[i].first>;
4117 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4118 LValue FlagsLVal = CGF.EmitLValueForField(
4119 Base,
4120 *std::next(KmpDependInfoRD->field_begin(),
4121 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4123 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4124 FlagsLVal);
4125 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4126 ++(*P);
4127 } else {
4128 LValue &PosLVal = *Pos.get<LValue *>();
4129 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4130 Idx = CGF.Builder.CreateNUWAdd(Idx,
4131 llvm::ConstantInt::get(Idx->getType(), 1));
4132 CGF.EmitStoreOfScalar(Idx, PosLVal);
4133 }
4134 }
4135}
4136
4138 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4140 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4141 "Expected depobj dependency kind.");
4143 SmallVector<LValue, 4> SizeLVals;
4144 ASTContext &C = CGF.getContext();
4145 {
4146 OMPIteratorGeneratorScope IteratorScope(
4147 CGF, cast_or_null<OMPIteratorExpr>(
4148 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4149 : nullptr));
4150 for (const Expr *E : Data.DepExprs) {
4151 llvm::Value *NumDeps;
4152 LValue Base;
4153 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4154 std::tie(NumDeps, Base) =
4155 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4156 LValue NumLVal = CGF.MakeAddrLValue(
4157 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4158 C.getUIntPtrType());
4159 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4160 NumLVal.getAddress(CGF));
4161 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4162 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4163 CGF.EmitStoreOfScalar(Add, NumLVal);
4164 SizeLVals.push_back(NumLVal);
4165 }
4166 }
4167 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4168 llvm::Value *Size =
4169 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4170 Sizes.push_back(Size);
4171 }
4172 return Sizes;
4173}
4174
4176 QualType &KmpDependInfoTy,
4177 LValue PosLVal,
4179 Address DependenciesArray) {
4180 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4181 "Expected depobj dependency kind.");
4182 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4183 {
4184 OMPIteratorGeneratorScope IteratorScope(
4185 CGF, cast_or_null<OMPIteratorExpr>(
4186 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4187 : nullptr));
4188 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4189 const Expr *E = Data.DepExprs[I];
4190 llvm::Value *NumDeps;
4191 LValue Base;
4192 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4193 std::tie(NumDeps, Base) =
4194 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4195
4196 // memcopy dependency data.
4197 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4198 ElSize,
4199 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4200 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4201 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4202 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4203
4204 // Increase pos.
4205 // pos += size;
4206 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4207 CGF.EmitStoreOfScalar(Add, PosLVal);
4208 }
4209 }
4210}
4211
4212std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4214 SourceLocation Loc) {
4215 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4216 return D.DepExprs.empty();
4217 }))
4218 return std::make_pair(nullptr, Address::invalid());
4219 // Process list of dependencies.
4221 Address DependenciesArray = Address::invalid();
4222 llvm::Value *NumOfElements = nullptr;
4223 unsigned NumDependencies = std::accumulate(
4224 Dependencies.begin(), Dependencies.end(), 0,
4225 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4226 return D.DepKind == OMPC_DEPEND_depobj
4227 ? V
4228 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4229 });
4230 QualType FlagsTy;
4231 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4232 bool HasDepobjDeps = false;
4233 bool HasRegularWithIterators = false;
4234 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4235 llvm::Value *NumOfRegularWithIterators =
4236 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4237 // Calculate number of depobj dependencies and regular deps with the
4238 // iterators.
4239 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4240 if (D.DepKind == OMPC_DEPEND_depobj) {
4243 for (llvm::Value *Size : Sizes) {
4244 NumOfDepobjElements =
4245 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4246 }
4247 HasDepobjDeps = true;
4248 continue;
4249 }
4250 // Include number of iterations, if any.
4251
4252 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4253 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4254 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4255 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4256 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4257 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4258 NumOfRegularWithIterators =
4259 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4260 }
4261 HasRegularWithIterators = true;
4262 continue;
4263 }
4264 }
4265
4266 QualType KmpDependInfoArrayTy;
4267 if (HasDepobjDeps || HasRegularWithIterators) {
4268 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4269 /*isSigned=*/false);
4270 if (HasDepobjDeps) {
4271 NumOfElements =
4272 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4273 }
4274 if (HasRegularWithIterators) {
4275 NumOfElements =
4276 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4277 }
4278 auto *OVE = new (C) OpaqueValueExpr(
4279 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4280 VK_PRValue);
4281 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4282 RValue::get(NumOfElements));
4283 KmpDependInfoArrayTy =
4284 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4285 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4286 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4287 // Properly emit variable-sized array.
4288 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4290 CGF.EmitVarDecl(*PD);
4291 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4292 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4293 /*isSigned=*/false);
4294 } else {
4295 KmpDependInfoArrayTy = C.getConstantArrayType(
4296 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4297 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4298 DependenciesArray =
4299 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4300 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4301 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4302 /*isSigned=*/false);
4303 }
4304 unsigned Pos = 0;
4305 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4306 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4307 Dependencies[I].IteratorExpr)
4308 continue;
4309 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4310 DependenciesArray);
4311 }
4312 // Copy regular dependencies with iterators.
4313 LValue PosLVal = CGF.MakeAddrLValue(
4314 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4315 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4316 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4317 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4318 !Dependencies[I].IteratorExpr)
4319 continue;
4320 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4321 DependenciesArray);
4322 }
4323 // Copy final depobj arrays without iterators.
4324 if (HasDepobjDeps) {
4325 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4326 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4327 continue;
4328 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4329 DependenciesArray);
4330 }
4331 }
4332 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4333 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4334 return std::make_pair(NumOfElements, DependenciesArray);
4335}
4336
4338 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4339 SourceLocation Loc) {
4340 if (Dependencies.DepExprs.empty())
4341 return Address::invalid();
4342 // Process list of dependencies.
4344 Address DependenciesArray = Address::invalid();
4345 unsigned NumDependencies = Dependencies.DepExprs.size();
4346 QualType FlagsTy;
4347 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4348 RecordDecl *KmpDependInfoRD =
4349 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4350
4351 llvm::Value *Size;
4352 // Define type kmp_depend_info[<Dependencies.size()>];
4353 // For depobj reserve one extra element to store the number of elements.
4354 // It is required to handle depobj(x) update(in) construct.
4355 // kmp_depend_info[<Dependencies.size()>] deps;
4356 llvm::Value *NumDepsVal;
4357 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4358 if (const auto *IE =
4359 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4360 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4361 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4362 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4363 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4364 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4365 }
4366 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4367 NumDepsVal);
4368 CharUnits SizeInBytes =
4369 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4370 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4371 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4372 NumDepsVal =
4373 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4374 } else {
4375 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4376 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4377 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4378 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4379 Size = CGM.getSize(Sz.alignTo(Align));
4380 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4381 }
4382 // Need to allocate on the dynamic memory.
4383 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4384 // Use default allocator.
4385 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4386 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4387
4388 llvm::Value *Addr =
4389 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4390 CGM.getModule(), OMPRTL___kmpc_alloc),
4391 Args, ".dep.arr.addr");
4392 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4394 Addr, KmpDependInfoLlvmTy->getPointerTo());
4395 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4396 // Write number of elements in the first element of array for depobj.
4397 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4398 // deps[i].base_addr = NumDependencies;
4399 LValue BaseAddrLVal = CGF.EmitLValueForField(
4400 Base,
4401 *std::next(KmpDependInfoRD->field_begin(),
4402 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4403 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4404 llvm::PointerUnion<unsigned *, LValue *> Pos;
4405 unsigned Idx = 1;
4406 LValue PosLVal;
4407 if (Dependencies.IteratorExpr) {
4408 PosLVal = CGF.MakeAddrLValue(
4409 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4410 C.getSizeType());
4411 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4412 /*IsInit=*/true);
4413 Pos = &PosLVal;
4414 } else {
4415 Pos = &Idx;
4416 }
4417 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4418 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4419 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4420 CGF.Int8Ty);
4421 return DependenciesArray;
4422}
4423
4425 SourceLocation Loc) {
4427 QualType FlagsTy;
4428 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4430 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4431 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4433 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4435 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4436 Addr.getElementType(), Addr.emitRawPointer(CGF),
4437 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4438 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4439 CGF.VoidPtrTy);
4440 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4441 // Use default allocator.
4442 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4443 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4444
4445 // _kmpc_free(gtid, addr, nullptr);
4446 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4447 CGM.getModule(), OMPRTL___kmpc_free),
4448 Args);
4449}
4450
4452 OpenMPDependClauseKind NewDepKind,
4453 SourceLocation Loc) {
4455 QualType FlagsTy;
4456 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4457 RecordDecl *KmpDependInfoRD =
4458 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4459 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4460 llvm::Value *NumDeps;
4461 LValue Base;
4462 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4463
4464 Address Begin = Base.getAddress(CGF);
4465 // Cast from pointer to array type to pointer to single element.
4466 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4467 Begin.emitRawPointer(CGF), NumDeps);
4468 // The basic structure here is a while-do loop.
4469 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4470 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4471 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4472 CGF.EmitBlock(BodyBB);
4473 llvm::PHINode *ElementPHI =
4474 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4475 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4476 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4477 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4478 Base.getTBAAInfo());
4479 // deps[i].flags = NewDepKind;
4480 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4481 LValue FlagsLVal = CGF.EmitLValueForField(
4482 Base, *std::next(KmpDependInfoRD->field_begin(),
4483 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4485 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4486 FlagsLVal);
4487
4488 // Shift the address forward by one element.
4489 llvm::Value *ElementNext =
4490 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4491 .emitRawPointer(CGF);
4492 ElementPHI->addIncoming(ElementNext, CGF.