clang 19.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGRecordLayout.h"
18#include "CodeGenFunction.h"
19#include "TargetInfo.h"
20#include "clang/AST/APValue.h"
21#include "clang/AST/Attr.h"
22#include "clang/AST/Decl.h"
31#include "llvm/ADT/ArrayRef.h"
32#include "llvm/ADT/SetOperations.h"
33#include "llvm/ADT/SmallBitVector.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/StringExtras.h"
36#include "llvm/Bitcode/BitcodeReader.h"
37#include "llvm/IR/Constants.h"
38#include "llvm/IR/DerivedTypes.h"
39#include "llvm/IR/GlobalValue.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Value.h"
42#include "llvm/Support/AtomicOrdering.h"
43#include "llvm/Support/Format.h"
44#include "llvm/Support/raw_ostream.h"
45#include <cassert>
46#include <cstdint>
47#include <numeric>
48#include <optional>
49
50using namespace clang;
51using namespace CodeGen;
52using namespace llvm::omp;
53
54namespace {
55/// Base class for handling code generation inside OpenMP regions.
56class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
57public:
58 /// Kinds of OpenMP regions used in codegen.
59 enum CGOpenMPRegionKind {
60 /// Region with outlined function for standalone 'parallel'
61 /// directive.
62 ParallelOutlinedRegion,
63 /// Region with outlined function for standalone 'task' directive.
64 TaskOutlinedRegion,
65 /// Region for constructs that do not require function outlining,
66 /// like 'for', 'sections', 'atomic' etc. directives.
67 InlinedRegion,
68 /// Region with outlined function for standalone 'target' directive.
69 TargetRegion,
70 };
71
72 CGOpenMPRegionInfo(const CapturedStmt &CS,
73 const CGOpenMPRegionKind RegionKind,
74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75 bool HasCancel)
76 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
77 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
78
79 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
80 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
81 bool HasCancel)
82 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
83 Kind(Kind), HasCancel(HasCancel) {}
84
85 /// Get a variable or parameter for storing global thread id
86 /// inside OpenMP construct.
87 virtual const VarDecl *getThreadIDVariable() const = 0;
88
89 /// Emit the captured statement body.
90 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
91
92 /// Get an LValue for the current ThreadID variable.
93 /// \return LValue for thread id variable. This LValue always has type int32*.
94 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
95
96 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
97
98 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
99
100 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
101
102 bool hasCancel() const { return HasCancel; }
103
104 static bool classof(const CGCapturedStmtInfo *Info) {
105 return Info->getKind() == CR_OpenMP;
106 }
107
108 ~CGOpenMPRegionInfo() override = default;
109
110protected:
111 CGOpenMPRegionKind RegionKind;
112 RegionCodeGenTy CodeGen;
114 bool HasCancel;
115};
116
117/// API for captured statement code generation in OpenMP constructs.
118class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
119public:
120 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
121 const RegionCodeGenTy &CodeGen,
122 OpenMPDirectiveKind Kind, bool HasCancel,
123 StringRef HelperName)
124 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
125 HasCancel),
126 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
127 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
128 }
129
130 /// Get a variable or parameter for storing global thread id
131 /// inside OpenMP construct.
132 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
133
134 /// Get the name of the capture helper.
135 StringRef getHelperName() const override { return HelperName; }
136
137 static bool classof(const CGCapturedStmtInfo *Info) {
138 return CGOpenMPRegionInfo::classof(Info) &&
139 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
140 ParallelOutlinedRegion;
141 }
142
143private:
144 /// A variable or parameter storing global thread id for OpenMP
145 /// constructs.
146 const VarDecl *ThreadIDVar;
147 StringRef HelperName;
148};
149
150/// API for captured statement code generation in OpenMP constructs.
151class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
152public:
153 class UntiedTaskActionTy final : public PrePostActionTy {
154 bool Untied;
155 const VarDecl *PartIDVar;
156 const RegionCodeGenTy UntiedCodeGen;
157 llvm::SwitchInst *UntiedSwitch = nullptr;
158
159 public:
160 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
161 const RegionCodeGenTy &UntiedCodeGen)
162 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
163 void Enter(CodeGenFunction &CGF) override {
164 if (Untied) {
165 // Emit task switching point.
166 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
167 CGF.GetAddrOfLocalVar(PartIDVar),
168 PartIDVar->getType()->castAs<PointerType>());
169 llvm::Value *Res =
170 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
171 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
172 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
173 CGF.EmitBlock(DoneBB);
175 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
176 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
177 CGF.Builder.GetInsertBlock());
178 emitUntiedSwitch(CGF);
179 }
180 }
181 void emitUntiedSwitch(CodeGenFunction &CGF) const {
182 if (Untied) {
183 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
184 CGF.GetAddrOfLocalVar(PartIDVar),
185 PartIDVar->getType()->castAs<PointerType>());
186 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
187 PartIdLVal);
188 UntiedCodeGen(CGF);
189 CodeGenFunction::JumpDest CurPoint =
190 CGF.getJumpDestInCurrentScope(".untied.next.");
192 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
193 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
194 CGF.Builder.GetInsertBlock());
195 CGF.EmitBranchThroughCleanup(CurPoint);
196 CGF.EmitBlock(CurPoint.getBlock());
197 }
198 }
199 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
200 };
201 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
202 const VarDecl *ThreadIDVar,
203 const RegionCodeGenTy &CodeGen,
204 OpenMPDirectiveKind Kind, bool HasCancel,
205 const UntiedTaskActionTy &Action)
206 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
207 ThreadIDVar(ThreadIDVar), Action(Action) {
208 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
209 }
210
211 /// Get a variable or parameter for storing global thread id
212 /// inside OpenMP construct.
213 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
214
215 /// Get an LValue for the current ThreadID variable.
216 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
217
218 /// Get the name of the capture helper.
219 StringRef getHelperName() const override { return ".omp_outlined."; }
220
221 void emitUntiedSwitch(CodeGenFunction &CGF) override {
222 Action.emitUntiedSwitch(CGF);
223 }
224
225 static bool classof(const CGCapturedStmtInfo *Info) {
226 return CGOpenMPRegionInfo::classof(Info) &&
227 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
228 TaskOutlinedRegion;
229 }
230
231private:
232 /// A variable or parameter storing global thread id for OpenMP
233 /// constructs.
234 const VarDecl *ThreadIDVar;
235 /// Action for emitting code for untied tasks.
236 const UntiedTaskActionTy &Action;
237};
238
239/// API for inlined captured statement code generation in OpenMP
240/// constructs.
241class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
242public:
243 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
244 const RegionCodeGenTy &CodeGen,
245 OpenMPDirectiveKind Kind, bool HasCancel)
246 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
247 OldCSI(OldCSI),
248 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
249
250 // Retrieve the value of the context parameter.
251 llvm::Value *getContextValue() const override {
252 if (OuterRegionInfo)
253 return OuterRegionInfo->getContextValue();
254 llvm_unreachable("No context value for inlined OpenMP region");
255 }
256
257 void setContextValue(llvm::Value *V) override {
258 if (OuterRegionInfo) {
259 OuterRegionInfo->setContextValue(V);
260 return;
261 }
262 llvm_unreachable("No context value for inlined OpenMP region");
263 }
264
265 /// Lookup the captured field decl for a variable.
266 const FieldDecl *lookup(const VarDecl *VD) const override {
267 if (OuterRegionInfo)
268 return OuterRegionInfo->lookup(VD);
269 // If there is no outer outlined region,no need to lookup in a list of
270 // captured variables, we can use the original one.
271 return nullptr;
272 }
273
274 FieldDecl *getThisFieldDecl() const override {
275 if (OuterRegionInfo)
276 return OuterRegionInfo->getThisFieldDecl();
277 return nullptr;
278 }
279
280 /// Get a variable or parameter for storing global thread id
281 /// inside OpenMP construct.
282 const VarDecl *getThreadIDVariable() const override {
283 if (OuterRegionInfo)
284 return OuterRegionInfo->getThreadIDVariable();
285 return nullptr;
286 }
287
288 /// Get an LValue for the current ThreadID variable.
289 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
290 if (OuterRegionInfo)
291 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
292 llvm_unreachable("No LValue for inlined OpenMP construct");
293 }
294
295 /// Get the name of the capture helper.
296 StringRef getHelperName() const override {
297 if (auto *OuterRegionInfo = getOldCSI())
298 return OuterRegionInfo->getHelperName();
299 llvm_unreachable("No helper name for inlined OpenMP construct");
300 }
301
302 void emitUntiedSwitch(CodeGenFunction &CGF) override {
303 if (OuterRegionInfo)
304 OuterRegionInfo->emitUntiedSwitch(CGF);
305 }
306
307 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
308
309 static bool classof(const CGCapturedStmtInfo *Info) {
310 return CGOpenMPRegionInfo::classof(Info) &&
311 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
312 }
313
314 ~CGOpenMPInlinedRegionInfo() override = default;
315
316private:
317 /// CodeGen info about outer OpenMP region.
318 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
319 CGOpenMPRegionInfo *OuterRegionInfo;
320};
321
322/// API for captured statement code generation in OpenMP target
323/// constructs. For this captures, implicit parameters are used instead of the
324/// captured fields. The name of the target region has to be unique in a given
325/// application so it is provided by the client, because only the client has
326/// the information to generate that.
327class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
328public:
329 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
330 const RegionCodeGenTy &CodeGen, StringRef HelperName)
331 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
332 /*HasCancel=*/false),
333 HelperName(HelperName) {}
334
335 /// This is unused for target regions because each starts executing
336 /// with a single thread.
337 const VarDecl *getThreadIDVariable() const override { return nullptr; }
338
339 /// Get the name of the capture helper.
340 StringRef getHelperName() const override { return HelperName; }
341
342 static bool classof(const CGCapturedStmtInfo *Info) {
343 return CGOpenMPRegionInfo::classof(Info) &&
344 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
345 }
346
347private:
348 StringRef HelperName;
349};
350
351static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
352 llvm_unreachable("No codegen for expressions");
353}
354/// API for generation of expressions captured in a innermost OpenMP
355/// region.
356class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
357public:
358 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
359 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
360 OMPD_unknown,
361 /*HasCancel=*/false),
362 PrivScope(CGF) {
363 // Make sure the globals captured in the provided statement are local by
364 // using the privatization logic. We assume the same variable is not
365 // captured more than once.
366 for (const auto &C : CS.captures()) {
367 if (!C.capturesVariable() && !C.capturesVariableByCopy())
368 continue;
369
370 const VarDecl *VD = C.getCapturedVar();
371 if (VD->isLocalVarDeclOrParm())
372 continue;
373
374 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
375 /*RefersToEnclosingVariableOrCapture=*/false,
377 C.getLocation());
378 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
379 }
380 (void)PrivScope.Privatize();
381 }
382
383 /// Lookup the captured field decl for a variable.
384 const FieldDecl *lookup(const VarDecl *VD) const override {
385 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
386 return FD;
387 return nullptr;
388 }
389
390 /// Emit the captured statement body.
391 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
392 llvm_unreachable("No body for expressions");
393 }
394
395 /// Get a variable or parameter for storing global thread id
396 /// inside OpenMP construct.
397 const VarDecl *getThreadIDVariable() const override {
398 llvm_unreachable("No thread id for expressions");
399 }
400
401 /// Get the name of the capture helper.
402 StringRef getHelperName() const override {
403 llvm_unreachable("No helper name for expressions");
404 }
405
406 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
407
408private:
409 /// Private scope to capture global variables.
410 CodeGenFunction::OMPPrivateScope PrivScope;
411};
412
413/// RAII for emitting code of OpenMP constructs.
414class InlinedOpenMPRegionRAII {
415 CodeGenFunction &CGF;
416 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
417 FieldDecl *LambdaThisCaptureField = nullptr;
418 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
419 bool NoInheritance = false;
420
421public:
422 /// Constructs region for combined constructs.
423 /// \param CodeGen Code generation sequence for combined directives. Includes
424 /// a list of functions used for code generation of implicitly inlined
425 /// regions.
426 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
427 OpenMPDirectiveKind Kind, bool HasCancel,
428 bool NoInheritance = true)
429 : CGF(CGF), NoInheritance(NoInheritance) {
430 // Start emission for the construct.
431 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
432 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
433 if (NoInheritance) {
434 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
435 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
436 CGF.LambdaThisCaptureField = nullptr;
437 BlockInfo = CGF.BlockInfo;
438 CGF.BlockInfo = nullptr;
439 }
440 }
441
442 ~InlinedOpenMPRegionRAII() {
443 // Restore original CapturedStmtInfo only if we're done with code emission.
444 auto *OldCSI =
445 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
446 delete CGF.CapturedStmtInfo;
447 CGF.CapturedStmtInfo = OldCSI;
448 if (NoInheritance) {
449 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
450 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
451 CGF.BlockInfo = BlockInfo;
452 }
453 }
454};
455
456/// Values for bit flags used in the ident_t to describe the fields.
457/// All enumeric elements are named and described in accordance with the code
458/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
459enum OpenMPLocationFlags : unsigned {
460 /// Use trampoline for internal microtask.
461 OMP_IDENT_IMD = 0x01,
462 /// Use c-style ident structure.
463 OMP_IDENT_KMPC = 0x02,
464 /// Atomic reduction option for kmpc_reduce.
465 OMP_ATOMIC_REDUCE = 0x10,
466 /// Explicit 'barrier' directive.
467 OMP_IDENT_BARRIER_EXPL = 0x20,
468 /// Implicit barrier in code.
469 OMP_IDENT_BARRIER_IMPL = 0x40,
470 /// Implicit barrier in 'for' directive.
471 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
472 /// Implicit barrier in 'sections' directive.
473 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
474 /// Implicit barrier in 'single' directive.
475 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
476 /// Call of __kmp_for_static_init for static loop.
477 OMP_IDENT_WORK_LOOP = 0x200,
478 /// Call of __kmp_for_static_init for sections.
479 OMP_IDENT_WORK_SECTIONS = 0x400,
480 /// Call of __kmp_for_static_init for distribute.
481 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
483};
484
485/// Describes ident structure that describes a source location.
486/// All descriptions are taken from
487/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
488/// Original structure:
489/// typedef struct ident {
490/// kmp_int32 reserved_1; /**< might be used in Fortran;
491/// see above */
492/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
493/// KMP_IDENT_KMPC identifies this union
494/// member */
495/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
496/// see above */
497///#if USE_ITT_BUILD
498/// /* but currently used for storing
499/// region-specific ITT */
500/// /* contextual information. */
501///#endif /* USE_ITT_BUILD */
502/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
503/// C++ */
504/// char const *psource; /**< String describing the source location.
505/// The string is composed of semi-colon separated
506// fields which describe the source file,
507/// the function and a pair of line numbers that
508/// delimit the construct.
509/// */
510/// } ident_t;
511enum IdentFieldIndex {
512 /// might be used in Fortran
513 IdentField_Reserved_1,
514 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
515 IdentField_Flags,
516 /// Not really used in Fortran any more
517 IdentField_Reserved_2,
518 /// Source[4] in Fortran, do not use for C++
519 IdentField_Reserved_3,
520 /// String describing the source location. The string is composed of
521 /// semi-colon separated fields which describe the source file, the function
522 /// and a pair of line numbers that delimit the construct.
523 IdentField_PSource
524};
525
526/// Schedule types for 'omp for' loops (these enumerators are taken from
527/// the enum sched_type in kmp.h).
528enum OpenMPSchedType {
529 /// Lower bound for default (unordered) versions.
530 OMP_sch_lower = 32,
531 OMP_sch_static_chunked = 33,
532 OMP_sch_static = 34,
533 OMP_sch_dynamic_chunked = 35,
534 OMP_sch_guided_chunked = 36,
535 OMP_sch_runtime = 37,
536 OMP_sch_auto = 38,
537 /// static with chunk adjustment (e.g., simd)
538 OMP_sch_static_balanced_chunked = 45,
539 /// Lower bound for 'ordered' versions.
540 OMP_ord_lower = 64,
541 OMP_ord_static_chunked = 65,
542 OMP_ord_static = 66,
543 OMP_ord_dynamic_chunked = 67,
544 OMP_ord_guided_chunked = 68,
545 OMP_ord_runtime = 69,
546 OMP_ord_auto = 70,
547 OMP_sch_default = OMP_sch_static,
548 /// dist_schedule types
549 OMP_dist_sch_static_chunked = 91,
550 OMP_dist_sch_static = 92,
551 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
552 /// Set if the monotonic schedule modifier was present.
553 OMP_sch_modifier_monotonic = (1 << 29),
554 /// Set if the nonmonotonic schedule modifier was present.
555 OMP_sch_modifier_nonmonotonic = (1 << 30),
556};
557
558/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
559/// region.
560class CleanupTy final : public EHScopeStack::Cleanup {
561 PrePostActionTy *Action;
562
563public:
564 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
565 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
566 if (!CGF.HaveInsertPoint())
567 return;
568 Action->Exit(CGF);
569 }
570};
571
572} // anonymous namespace
573
576 if (PrePostAction) {
577 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
578 Callback(CodeGen, CGF, *PrePostAction);
579 } else {
580 PrePostActionTy Action;
581 Callback(CodeGen, CGF, Action);
582 }
583}
584
585/// Check if the combiner is a call to UDR combiner and if it is so return the
586/// UDR decl used for reduction.
587static const OMPDeclareReductionDecl *
588getReductionInit(const Expr *ReductionOp) {
589 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
590 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
591 if (const auto *DRE =
592 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
593 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
594 return DRD;
595 return nullptr;
596}
597
599 const OMPDeclareReductionDecl *DRD,
600 const Expr *InitOp,
601 Address Private, Address Original,
602 QualType Ty) {
603 if (DRD->getInitializer()) {
604 std::pair<llvm::Function *, llvm::Function *> Reduction =
606 const auto *CE = cast<CallExpr>(InitOp);
607 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
608 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
609 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
610 const auto *LHSDRE =
611 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
612 const auto *RHSDRE =
613 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
614 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
615 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
616 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
617 (void)PrivateScope.Privatize();
619 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
620 CGF.EmitIgnoredExpr(InitOp);
621 } else {
622 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
623 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
624 auto *GV = new llvm::GlobalVariable(
625 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
626 llvm::GlobalValue::PrivateLinkage, Init, Name);
627 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
628 RValue InitRVal;
629 switch (CGF.getEvaluationKind(Ty)) {
630 case TEK_Scalar:
631 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
632 break;
633 case TEK_Complex:
634 InitRVal =
636 break;
637 case TEK_Aggregate: {
638 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
639 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
640 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
641 /*IsInitializer=*/false);
642 return;
643 }
644 }
645 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
646 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
647 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
648 /*IsInitializer=*/false);
649 }
650}
651
652/// Emit initialization of arrays of complex types.
653/// \param DestAddr Address of the array.
654/// \param Type Type of array.
655/// \param Init Initial expression of array.
656/// \param SrcAddr Address of the original array.
658 QualType Type, bool EmitDeclareReductionInit,
659 const Expr *Init,
660 const OMPDeclareReductionDecl *DRD,
661 Address SrcAddr = Address::invalid()) {
662 // Perform element-by-element initialization.
663 QualType ElementTy;
664
665 // Drill down to the base element type on both arrays.
666 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
667 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
668 if (DRD)
669 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
670
671 llvm::Value *SrcBegin = nullptr;
672 if (DRD)
673 SrcBegin = SrcAddr.emitRawPointer(CGF);
674 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
675 // Cast from pointer to array type to pointer to single element.
676 llvm::Value *DestEnd =
677 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
678 // The basic structure here is a while-do loop.
679 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
680 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
681 llvm::Value *IsEmpty =
682 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
683 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
684
685 // Enter the loop body, making that address the current address.
686 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
687 CGF.EmitBlock(BodyBB);
688
689 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
690
691 llvm::PHINode *SrcElementPHI = nullptr;
692 Address SrcElementCurrent = Address::invalid();
693 if (DRD) {
694 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
695 "omp.arraycpy.srcElementPast");
696 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
697 SrcElementCurrent =
698 Address(SrcElementPHI, SrcAddr.getElementType(),
699 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
700 }
701 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
702 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
703 DestElementPHI->addIncoming(DestBegin, EntryBB);
704 Address DestElementCurrent =
705 Address(DestElementPHI, DestAddr.getElementType(),
706 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
707
708 // Emit copy.
709 {
710 CodeGenFunction::RunCleanupsScope InitScope(CGF);
711 if (EmitDeclareReductionInit) {
712 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
713 SrcElementCurrent, ElementTy);
714 } else
715 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
716 /*IsInitializer=*/false);
717 }
718
719 if (DRD) {
720 // Shift the address forward by one element.
721 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
722 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
723 "omp.arraycpy.dest.element");
724 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
725 }
726
727 // Shift the address forward by one element.
728 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
729 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
730 "omp.arraycpy.dest.element");
731 // Check whether we've reached the end.
732 llvm::Value *Done =
733 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
734 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
735 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
736
737 // Done.
738 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
739}
740
741LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
742 return CGF.EmitOMPSharedLValue(E);
743}
744
745LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
746 const Expr *E) {
747 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
748 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
749 return LValue();
750}
751
752void ReductionCodeGen::emitAggregateInitialization(
753 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
754 const OMPDeclareReductionDecl *DRD) {
755 // Emit VarDecl with copy init for arrays.
756 // Get the address of the original variable captured in current
757 // captured region.
758 const auto *PrivateVD =
759 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
760 bool EmitDeclareReductionInit =
761 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
762 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
763 EmitDeclareReductionInit,
764 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
765 : PrivateVD->getInit(),
766 DRD, SharedAddr);
767}
768
771 ArrayRef<const Expr *> Privates,
772 ArrayRef<const Expr *> ReductionOps) {
773 ClausesData.reserve(Shareds.size());
774 SharedAddresses.reserve(Shareds.size());
775 Sizes.reserve(Shareds.size());
776 BaseDecls.reserve(Shareds.size());
777 const auto *IOrig = Origs.begin();
778 const auto *IPriv = Privates.begin();
779 const auto *IRed = ReductionOps.begin();
780 for (const Expr *Ref : Shareds) {
781 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
782 std::advance(IOrig, 1);
783 std::advance(IPriv, 1);
784 std::advance(IRed, 1);
785 }
786}
787
789 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
790 "Number of generated lvalues must be exactly N.");
791 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
792 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
793 SharedAddresses.emplace_back(First, Second);
794 if (ClausesData[N].Shared == ClausesData[N].Ref) {
795 OrigAddresses.emplace_back(First, Second);
796 } else {
797 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
798 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
799 OrigAddresses.emplace_back(First, Second);
800 }
801}
802
804 QualType PrivateType = getPrivateType(N);
805 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
806 if (!PrivateType->isVariablyModifiedType()) {
807 Sizes.emplace_back(
808 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
809 nullptr);
810 return;
811 }
812 llvm::Value *Size;
813 llvm::Value *SizeInChars;
814 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
815 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
816 if (AsArraySection) {
817 Size = CGF.Builder.CreatePtrDiff(ElemType,
818 OrigAddresses[N].second.getPointer(CGF),
819 OrigAddresses[N].first.getPointer(CGF));
820 Size = CGF.Builder.CreateNUWAdd(
821 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
822 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
823 } else {
824 SizeInChars =
825 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
826 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
827 }
828 Sizes.emplace_back(SizeInChars, Size);
830 CGF,
831 cast<OpaqueValueExpr>(
832 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
833 RValue::get(Size));
834 CGF.EmitVariablyModifiedType(PrivateType);
835}
836
838 llvm::Value *Size) {
839 QualType PrivateType = getPrivateType(N);
840 if (!PrivateType->isVariablyModifiedType()) {
841 assert(!Size && !Sizes[N].second &&
842 "Size should be nullptr for non-variably modified reduction "
843 "items.");
844 return;
845 }
847 CGF,
848 cast<OpaqueValueExpr>(
849 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
850 RValue::get(Size));
851 CGF.EmitVariablyModifiedType(PrivateType);
852}
853
855 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
856 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
857 assert(SharedAddresses.size() > N && "No variable was generated");
858 const auto *PrivateVD =
859 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
860 const OMPDeclareReductionDecl *DRD =
861 getReductionInit(ClausesData[N].ReductionOp);
862 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
863 if (DRD && DRD->getInitializer())
864 (void)DefaultInit(CGF);
865 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
866 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
867 (void)DefaultInit(CGF);
868 QualType SharedType = SharedAddresses[N].first.getType();
869 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
870 PrivateAddr, SharedAddr, SharedType);
871 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
872 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
873 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
874 PrivateVD->getType().getQualifiers(),
875 /*IsInitializer=*/false);
876 }
877}
878
880 QualType PrivateType = getPrivateType(N);
881 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
882 return DTorKind != QualType::DK_none;
883}
884
886 Address PrivateAddr) {
887 QualType PrivateType = getPrivateType(N);
888 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
889 if (needCleanups(N)) {
890 PrivateAddr =
891 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
892 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
893 }
894}
895
897 LValue BaseLV) {
898 BaseTy = BaseTy.getNonReferenceType();
899 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
900 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
901 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
902 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
903 } else {
904 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
905 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
906 }
907 BaseTy = BaseTy->getPointeeType();
908 }
909 return CGF.MakeAddrLValue(
910 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
911 BaseLV.getType(), BaseLV.getBaseInfo(),
912 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
913}
914
916 Address OriginalBaseAddress, llvm::Value *Addr) {
918 Address TopTmp = Address::invalid();
919 Address MostTopTmp = Address::invalid();
920 BaseTy = BaseTy.getNonReferenceType();
921 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
922 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
923 Tmp = CGF.CreateMemTemp(BaseTy);
924 if (TopTmp.isValid())
925 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
926 else
927 MostTopTmp = Tmp;
928 TopTmp = Tmp;
929 BaseTy = BaseTy->getPointeeType();
930 }
931
932 if (Tmp.isValid()) {
934 Addr, Tmp.getElementType());
935 CGF.Builder.CreateStore(Addr, Tmp);
936 return MostTopTmp;
937 }
938
940 Addr, OriginalBaseAddress.getType());
941 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
942}
943
944static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
945 const VarDecl *OrigVD = nullptr;
946 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
947 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
949 Base = TempOASE->getBase()->IgnoreParenImpCasts();
950 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
951 Base = TempASE->getBase()->IgnoreParenImpCasts();
952 DE = cast<DeclRefExpr>(Base);
953 OrigVD = cast<VarDecl>(DE->getDecl());
954 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
955 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
956 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
957 Base = TempASE->getBase()->IgnoreParenImpCasts();
958 DE = cast<DeclRefExpr>(Base);
959 OrigVD = cast<VarDecl>(DE->getDecl());
960 }
961 return OrigVD;
962}
963
965 Address PrivateAddr) {
966 const DeclRefExpr *DE;
967 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
968 BaseDecls.emplace_back(OrigVD);
969 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
970 LValue BaseLValue =
971 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
972 OriginalBaseLValue);
973 Address SharedAddr = SharedAddresses[N].first.getAddress();
974 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
975 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
976 SharedAddr.emitRawPointer(CGF));
977 llvm::Value *PrivatePointer =
979 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
980 llvm::Value *Ptr = CGF.Builder.CreateGEP(
981 SharedAddr.getElementType(), PrivatePointer, Adjustment);
982 return castToBase(CGF, OrigVD->getType(),
983 SharedAddresses[N].first.getType(),
984 OriginalBaseLValue.getAddress(), Ptr);
985 }
986 BaseDecls.emplace_back(
987 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
988 return PrivateAddr;
989}
990
992 const OMPDeclareReductionDecl *DRD =
993 getReductionInit(ClausesData[N].ReductionOp);
994 return DRD && DRD->getInitializer();
995}
996
997LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
998 return CGF.EmitLoadOfPointerLValue(
999 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1000 getThreadIDVariable()->getType()->castAs<PointerType>());
1001}
1002
1003void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1004 if (!CGF.HaveInsertPoint())
1005 return;
1006 // 1.2.2 OpenMP Language Terminology
1007 // Structured block - An executable statement with a single entry at the
1008 // top and a single exit at the bottom.
1009 // The point of exit cannot be a branch out of the structured block.
1010 // longjmp() and throw() must not violate the entry/exit criteria.
1011 CGF.EHStack.pushTerminate();
1012 if (S)
1014 CodeGen(CGF);
1015 CGF.EHStack.popTerminate();
1016}
1017
1018LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1019 CodeGenFunction &CGF) {
1020 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1021 getThreadIDVariable()->getType(),
1023}
1024
1026 QualType FieldTy) {
1027 auto *Field = FieldDecl::Create(
1028 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1029 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1030 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1031 Field->setAccess(AS_public);
1032 DC->addDecl(Field);
1033 return Field;
1034}
1035
1037 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1038 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1039 llvm::OpenMPIRBuilderConfig Config(
1040 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1041 CGM.getLangOpts().OpenMPOffloadMandatory,
1042 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1043 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice
1047 : StringRef{});
1048 OMPBuilder.setConfig(Config);
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056}
1057
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071}
1072
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075}
1076
1077static llvm::Function *
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(&OmpOutParm);
1090 Args.push_back(&OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097 Name, &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099 if (CGM.getLangOpts().Optimize) {
1100 Fn->removeFnAttr(llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108 Out->getLocation());
1109 CodeGenFunction::OMPPrivateScope Scope(CGF);
1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111 Scope.addPrivate(
1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115 Scope.addPrivate(
1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Out->getInit())) {
1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122 Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130}
1131
1134 if (UDRMap.count(D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, D->getType(), D->getCombiner(),
1138 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1139 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1144 CGM, D->getType(),
1145 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1146 : nullptr,
1147 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1148 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(D, Combiner, Initializer);
1152 if (CGF) {
1153 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1154 Decls.second.push_back(D);
1155 }
1156}
1157
1158std::pair<llvm::Function *, llvm::Function *>
1160 auto I = UDRMap.find(D);
1161 if (I != UDRMap.end())
1162 return I->second;
1163 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1164 return UDRMap.lookup(D);
1165}
1166
1167namespace {
1168// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169// Builder if one is present.
1170struct PushAndPopStackRAII {
1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1172 bool HasCancel, llvm::omp::Directive Kind)
1173 : OMPBuilder(OMPBuilder) {
1174 if (!OMPBuilder)
1175 return;
1176
1177 // The following callback is the crucial part of clangs cleanup process.
1178 //
1179 // NOTE:
1180 // Once the OpenMPIRBuilder is used to create parallel regions (and
1181 // similar), the cancellation destination (Dest below) is determined via
1182 // IP. That means if we have variables to finalize we split the block at IP,
1183 // use the new block (=BB) as destination to build a JumpDest (via
1184 // getJumpDestInCurrentScope(BB)) which then is fed to
1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186 // to push & pop an FinalizationInfo object.
1187 // The FiniCB will still be needed but at the point where the
1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1190 assert(IP.getBlock()->end() == IP.getPoint() &&
1191 "Clang CG should cause non-terminated block!");
1192 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1193 CGF.Builder.restoreIP(IP);
1195 CGF.getOMPCancelDestination(OMPD_parallel);
1196 CGF.EmitBranchThroughCleanup(Dest);
1197 };
1198
1199 // TODO: Remove this once we emit parallel regions through the
1200 // OpenMPIRBuilder as it can do this setup internally.
1201 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1202 OMPBuilder->pushFinalizationCB(std::move(FI));
1203 }
1204 ~PushAndPopStackRAII() {
1205 if (OMPBuilder)
1206 OMPBuilder->popFinalizationCB();
1207 }
1208 llvm::OpenMPIRBuilder *OMPBuilder;
1209};
1210} // namespace
1211
1213 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1214 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1215 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1216 assert(ThreadIDVar->getType()->isPointerType() &&
1217 "thread id variable must be of type kmp_int32 *");
1218 CodeGenFunction CGF(CGM, true);
1219 bool HasCancel = false;
1220 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1223 HasCancel = OPD->hasCancel();
1224 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1225 HasCancel = OPSD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1231 HasCancel = OPFD->hasCancel();
1232 else if (const auto *OPFD =
1233 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1234 HasCancel = OPFD->hasCancel();
1235 else if (const auto *OPFD =
1236 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1237 HasCancel = OPFD->hasCancel();
1238
1239 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1240 // parallel region to make cancellation barriers work properly.
1241 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1242 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1243 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1244 HasCancel, OutlinedHelperName);
1245 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1247}
1248
1249std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1250 std::string Suffix = getName({"omp_outlined"});
1251 return (Name + Suffix).str();
1252}
1253
1255 return getOutlinedHelperName(CGF.CurFn->getName());
1256}
1257
1258std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1259 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1260 return (Name + Suffix).str();
1261}
1262
1265 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1266 const RegionCodeGenTy &CodeGen) {
1267 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1269 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1270 CodeGen);
1271}
1272
1275 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1276 const RegionCodeGenTy &CodeGen) {
1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1280 CodeGen);
1281}
1282
1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1286 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1287 bool Tied, unsigned &NumberOfParts) {
1288 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1289 PrePostActionTy &) {
1290 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1291 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1292 llvm::Value *TaskArgs[] = {
1293 UpLoc, ThreadID,
1294 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1295 TaskTVar->getType()->castAs<PointerType>())
1296 .getPointer(CGF)};
1297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1298 CGM.getModule(), OMPRTL___kmpc_omp_task),
1299 TaskArgs);
1300 };
1301 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1302 UntiedCodeGen);
1303 CodeGen.setAction(Action);
1304 assert(!ThreadIDVar->getType()->isPointerType() &&
1305 "thread id variable must be of type kmp_int32 for tasks");
1306 const OpenMPDirectiveKind Region =
1307 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1308 : OMPD_task;
1309 const CapturedStmt *CS = D.getCapturedStmt(Region);
1310 bool HasCancel = false;
1311 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319
1320 CodeGenFunction CGF(CGM, true);
1321 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1322 InnermostKind, HasCancel, Action);
1323 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1324 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1325 if (!Tied)
1326 NumberOfParts = Action.getNumberOfParts();
1327 return Res;
1328}
1329
1331 bool AtCurrentPoint) {
1332 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1333 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1334
1335 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1336 if (AtCurrentPoint) {
1337 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1338 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1339 } else {
1340 Elem.second.ServiceInsertPt =
1341 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1342 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1343 }
1344}
1345
1347 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1348 if (Elem.second.ServiceInsertPt) {
1349 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1350 Elem.second.ServiceInsertPt = nullptr;
1351 Ptr->eraseFromParent();
1352 }
1353}
1354
1357 SmallString<128> &Buffer) {
1358 llvm::raw_svector_ostream OS(Buffer);
1359 // Build debug location
1361 OS << ";" << PLoc.getFilename() << ";";
1362 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1363 OS << FD->getQualifiedNameAsString();
1364 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1365 return OS.str();
1366}
1367
1370 unsigned Flags, bool EmitLoc) {
1371 uint32_t SrcLocStrSize;
1372 llvm::Constant *SrcLocStr;
1373 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1374 llvm::codegenoptions::NoDebugInfo) ||
1375 Loc.isInvalid()) {
1376 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1377 } else {
1378 std::string FunctionName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1382 const char *FileName = PLoc.getFilename();
1383 unsigned Line = PLoc.getLine();
1384 unsigned Column = PLoc.getColumn();
1385 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1386 Column, SrcLocStrSize);
1387 }
1388 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1389 return OMPBuilder.getOrCreateIdent(
1390 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1391}
1392
1395 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1396 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1397 // the clang invariants used below might be broken.
1398 if (CGM.getLangOpts().OpenMPIRBuilder) {
1399 SmallString<128> Buffer;
1400 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1401 uint32_t SrcLocStrSize;
1402 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1403 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1404 return OMPBuilder.getOrCreateThreadID(
1405 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1406 }
1407
1408 llvm::Value *ThreadID = nullptr;
1409 // Check whether we've already cached a load of the thread id in this
1410 // function.
1411 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1412 if (I != OpenMPLocThreadIDMap.end()) {
1413 ThreadID = I->second.ThreadID;
1414 if (ThreadID != nullptr)
1415 return ThreadID;
1416 }
1417 // If exceptions are enabled, do not use parameter to avoid possible crash.
1418 if (auto *OMPRegionInfo =
1419 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1420 if (OMPRegionInfo->getThreadIDVariable()) {
1421 // Check if this an outlined function with thread id passed as argument.
1422 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1423 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1424 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1425 !CGF.getLangOpts().CXXExceptions ||
1426 CGF.Builder.GetInsertBlock() == TopBlock ||
1427 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1428 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1429 TopBlock ||
1430 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1431 CGF.Builder.GetInsertBlock()) {
1432 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1433 // If value loaded in entry block, cache it and use it everywhere in
1434 // function.
1435 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1436 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1437 Elem.second.ThreadID = ThreadID;
1438 }
1439 return ThreadID;
1440 }
1441 }
1442 }
1443
1444 // This is not an outlined function region - need to call __kmpc_int32
1445 // kmpc_global_thread_num(ident_t *loc).
1446 // Generate thread id value and cache this value for use across the
1447 // function.
1448 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1449 if (!Elem.second.ServiceInsertPt)
1451 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1452 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454 llvm::CallInst *Call = CGF.Builder.CreateCall(
1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456 OMPRTL___kmpc_global_thread_num),
1457 emitUpdateLocation(CGF, Loc));
1458 Call->setCallingConv(CGF.getRuntimeCC());
1459 Elem.second.ThreadID = Call;
1460 return Call;
1461}
1462
1464 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1467 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468 }
1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471 UDRMap.erase(D);
1472 FunctionUDRMap.erase(CGF.CurFn);
1473 }
1474 auto I = FunctionUDMMap.find(CGF.CurFn);
1475 if (I != FunctionUDMMap.end()) {
1476 for(const auto *D : I->second)
1477 UDMMap.erase(D);
1478 FunctionUDMMap.erase(I);
1479 }
1482}
1483
1485 return OMPBuilder.IdentPtr;
1486}
1487
1489 if (!Kmpc_MicroTy) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494 }
1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496}
1497
1498llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1500 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1502 if (!DevTy)
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1504
1505 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1508 break;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1511 break;
1512 case OMPDeclareTargetDeclAttr::DT_Any:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1514 break;
1515 default:
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1517 break;
1518 }
1519}
1520
1521llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1523 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1525 if (!MapType)
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1527 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1530 break;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1533 break;
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1536 break;
1537 default:
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1539 break;
1540 }
1541}
1542
1543static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1544 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1545 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1546
1547 auto FileInfoCallBack = [&]() {
1549 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1550
1551 llvm::sys::fs::UniqueID ID;
1552 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1553 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1554 }
1555
1556 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1557 };
1558
1559 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1560}
1561
1563 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1564
1565 auto LinkageForVariable = [&VD, this]() {
1567 };
1568
1569 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1570
1571 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1573 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1576 VD->isExternallyVisible(),
1578 VD->getCanonicalDecl()->getBeginLoc()),
1579 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1580 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1581 LinkageForVariable);
1582
1583 if (!addr)
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1586}
1587
1588llvm::Constant *
1590 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix = getName({"cache", ""});
1594 return OMPBuilder.getOrCreateInternalVariable(
1595 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1596}
1597
1599 const VarDecl *VD,
1600 Address VDAddr,
1602 if (CGM.getLangOpts().OpenMPUseTLS &&
1604 return VDAddr;
1605
1606 llvm::Type *VarTy = VDAddr.getElementType();
1607 llvm::Value *Args[] = {
1609 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1612 return Address(
1613 CGF.EmitRuntimeCall(
1614 OMPBuilder.getOrCreateRuntimeFunction(
1615 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1616 Args),
1617 CGF.Int8Ty, VDAddr.getAlignment());
1618}
1619
1621 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1622 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1624 // library.
1625 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1627 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1628 OMPLoc);
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value *Args[] = {
1632 OMPLoc,
1633 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1634 Ctor, CopyCtor, Dtor};
1635 CGF.EmitRuntimeCall(
1636 OMPBuilder.getOrCreateRuntimeFunction(
1637 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1638 Args);
1639}
1640
1642 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1643 bool PerformInit, CodeGenFunction *CGF) {
1644 if (CGM.getLangOpts().OpenMPUseTLS &&
1646 return nullptr;
1647
1648 VD = VD->getDefinition(CGM.getContext());
1649 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1650 QualType ASTTy = VD->getType();
1651
1652 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1653 const Expr *Init = VD->getAnyInitializer();
1654 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction CtorCGF(CGM);
1658 FunctionArgList Args;
1659 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1660 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1662 Args.push_back(&Dst);
1663
1665 CGM.getContext().VoidPtrTy, Args);
1666 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1667 std::string Name = getName({"__kmpc_global_ctor_", ""});
1668 llvm::Function *Fn =
1670 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1671 Args, Loc, Loc);
1672 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1673 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1675 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1676 VDAddr.getAlignment());
1677 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal = CtorCGF.EmitLoadOfScalar(
1680 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1682 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1683 CtorCGF.FinishFunction();
1684 Ctor = Fn;
1685 }
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction DtorCGF(CGM);
1690 FunctionArgList Args;
1691 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1692 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1694 Args.push_back(&Dst);
1695
1697 CGM.getContext().VoidTy, Args);
1698 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1699 std::string Name = getName({"__kmpc_global_dtor_", ""});
1700 llvm::Function *Fn =
1702 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1703 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1704 Loc, Loc);
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1707 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1708 DtorCGF.GetAddrOfLocalVar(&Dst),
1709 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1710 DtorCGF.emitDestroy(
1711 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1712 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1713 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1714 DtorCGF.FinishFunction();
1715 Dtor = Fn;
1716 }
1717 // Do not emit init function if it is not required.
1718 if (!Ctor && !Dtor)
1719 return nullptr;
1720
1721 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1722 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1723 /*isVarArg=*/false)
1724 ->getPointerTo();
1725 // Copying constructor for the threadprivate variable.
1726 // Must be NULL - reserved by runtime, but currently it requires that this
1727 // parameter is always NULL. Otherwise it fires assertion.
1728 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1729 if (Ctor == nullptr) {
1730 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1731 /*isVarArg=*/false)
1732 ->getPointerTo();
1733 Ctor = llvm::Constant::getNullValue(CtorTy);
1734 }
1735 if (Dtor == nullptr) {
1736 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1737 /*isVarArg=*/false)
1738 ->getPointerTo();
1739 Dtor = llvm::Constant::getNullValue(DtorTy);
1740 }
1741 if (!CGF) {
1742 auto *InitFunctionTy =
1743 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1744 std::string Name = getName({"__omp_threadprivate_init_", ""});
1745 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1746 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1747 CodeGenFunction InitCGF(CGM);
1748 FunctionArgList ArgList;
1749 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1750 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1751 Loc, Loc);
1752 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1753 InitCGF.FinishFunction();
1754 return InitFunction;
1755 }
1756 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1757 }
1758 return nullptr;
1759}
1760
1762 llvm::GlobalValue *GV) {
1763 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1764 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1765
1766 // We only need to handle active 'indirect' declare target functions.
1767 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1768 return;
1769
1770 // Get a mangled name to store the new device global in.
1771 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1773 SmallString<128> Name;
1774 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1775
1776 // We need to generate a new global to hold the address of the indirectly
1777 // called device function. Doing this allows us to keep the visibility and
1778 // linkage of the associated function unchanged while allowing the runtime to
1779 // access its value.
1780 llvm::GlobalValue *Addr = GV;
1781 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1782 Addr = new llvm::GlobalVariable(
1784 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1785 nullptr, llvm::GlobalValue::NotThreadLocal,
1786 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1787 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1788 }
1789
1790 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1792 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1793 llvm::GlobalValue::WeakODRLinkage);
1794}
1795
1797 QualType VarType,
1798 StringRef Name) {
1799 std::string Suffix = getName({"artificial", ""});
1800 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1801 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1802 VarLVType, Twine(Name).concat(Suffix).str());
1803 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1805 GAddr->setThreadLocal(/*Val=*/true);
1806 return Address(GAddr, GAddr->getValueType(),
1808 }
1809 std::string CacheSuffix = getName({"cache", ""});
1810 llvm::Value *Args[] = {
1814 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1815 /*isSigned=*/false),
1816 OMPBuilder.getOrCreateInternalVariable(
1818 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1819 return Address(
1821 CGF.EmitRuntimeCall(
1822 OMPBuilder.getOrCreateRuntimeFunction(
1823 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1824 Args),
1825 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1826 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1827}
1828
1830 const RegionCodeGenTy &ThenGen,
1831 const RegionCodeGenTy &ElseGen) {
1832 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1833
1834 // If the condition constant folds and can be elided, try to avoid emitting
1835 // the condition and the dead arm of the if/else.
1836 bool CondConstant;
1837 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1838 if (CondConstant)
1839 ThenGen(CGF);
1840 else
1841 ElseGen(CGF);
1842 return;
1843 }
1844
1845 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1846 // emit the conditional branch.
1847 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1848 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1849 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1850 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1851
1852 // Emit the 'then' code.
1853 CGF.EmitBlock(ThenBlock);
1854 ThenGen(CGF);
1855 CGF.EmitBranch(ContBlock);
1856 // Emit the 'else' code if present.
1857 // There is no need to emit line number for unconditional branch.
1859 CGF.EmitBlock(ElseBlock);
1860 ElseGen(CGF);
1861 // There is no need to emit line number for unconditional branch.
1863 CGF.EmitBranch(ContBlock);
1864 // Emit the continuation block for code after the if.
1865 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1866}
1867
1869 llvm::Function *OutlinedFn,
1870 ArrayRef<llvm::Value *> CapturedVars,
1871 const Expr *IfCond,
1872 llvm::Value *NumThreads) {
1873 if (!CGF.HaveInsertPoint())
1874 return;
1875 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1876 auto &M = CGM.getModule();
1877 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1878 this](CodeGenFunction &CGF, PrePostActionTy &) {
1879 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1881 llvm::Value *Args[] = {
1882 RTLoc,
1883 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1884 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
1886 RealArgs.append(std::begin(Args), std::end(Args));
1887 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1888
1889 llvm::FunctionCallee RTLFn =
1890 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1891 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1892 };
1893 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1894 this](CodeGenFunction &CGF, PrePostActionTy &) {
1896 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1897 // Build calls:
1898 // __kmpc_serialized_parallel(&Loc, GTid);
1899 llvm::Value *Args[] = {RTLoc, ThreadID};
1900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1901 M, OMPRTL___kmpc_serialized_parallel),
1902 Args);
1903
1904 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1905 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1906 RawAddress ZeroAddrBound =
1908 /*Name=*/".bound.zero.addr");
1909 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1911 // ThreadId for serialized parallels is 0.
1912 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1913 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1914 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1915
1916 // Ensure we do not inline the function. This is trivially true for the ones
1917 // passed to __kmpc_fork_call but the ones called in serialized regions
1918 // could be inlined. This is not a perfect but it is closer to the invariant
1919 // we want, namely, every data environment starts with a new function.
1920 // TODO: We should pass the if condition to the runtime function and do the
1921 // handling there. Much cleaner code.
1922 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1923 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1924 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1925
1926 // __kmpc_end_serialized_parallel(&Loc, GTid);
1927 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1928 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1929 M, OMPRTL___kmpc_end_serialized_parallel),
1930 EndArgs);
1931 };
1932 if (IfCond) {
1933 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1934 } else {
1935 RegionCodeGenTy ThenRCG(ThenGen);
1936 ThenRCG(CGF);
1937 }
1938}
1939
1940// If we're inside an (outlined) parallel region, use the region info's
1941// thread-ID variable (it is passed in a first argument of the outlined function
1942// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1943// regular serial code region, get thread ID by calling kmp_int32
1944// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1945// return the address of that temp.
1948 if (auto *OMPRegionInfo =
1949 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1950 if (OMPRegionInfo->getThreadIDVariable())
1951 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1952
1953 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1954 QualType Int32Ty =
1955 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1956 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1957 CGF.EmitStoreOfScalar(ThreadID,
1958 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1959
1960 return ThreadIDTemp;
1961}
1962
1963llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1964 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1965 std::string Name = getName({Prefix, "var"});
1966 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1967}
1968
1969namespace {
1970/// Common pre(post)-action for different OpenMP constructs.
1971class CommonActionTy final : public PrePostActionTy {
1972 llvm::FunctionCallee EnterCallee;
1973 ArrayRef<llvm::Value *> EnterArgs;
1974 llvm::FunctionCallee ExitCallee;
1975 ArrayRef<llvm::Value *> ExitArgs;
1976 bool Conditional;
1977 llvm::BasicBlock *ContBlock = nullptr;
1978
1979public:
1980 CommonActionTy(llvm::FunctionCallee EnterCallee,
1981 ArrayRef<llvm::Value *> EnterArgs,
1982 llvm::FunctionCallee ExitCallee,
1983 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1984 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1985 ExitArgs(ExitArgs), Conditional(Conditional) {}
1986 void Enter(CodeGenFunction &CGF) override {
1987 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1988 if (Conditional) {
1989 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1990 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1991 ContBlock = CGF.createBasicBlock("omp_if.end");
1992 // Generate the branch (If-stmt)
1993 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1994 CGF.EmitBlock(ThenBlock);
1995 }
1996 }
1997 void Done(CodeGenFunction &CGF) {
1998 // Emit the rest of blocks/branches
1999 CGF.EmitBranch(ContBlock);
2000 CGF.EmitBlock(ContBlock, true);
2001 }
2002 void Exit(CodeGenFunction &CGF) override {
2003 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2004 }
2005};
2006} // anonymous namespace
2007
2009 StringRef CriticalName,
2010 const RegionCodeGenTy &CriticalOpGen,
2011 SourceLocation Loc, const Expr *Hint) {
2012 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2013 // CriticalOpGen();
2014 // __kmpc_end_critical(ident_t *, gtid, Lock);
2015 // Prepare arguments and build a call to __kmpc_critical
2016 if (!CGF.HaveInsertPoint())
2017 return;
2018 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2019 getCriticalRegionLock(CriticalName)};
2020 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2021 std::end(Args));
2022 if (Hint) {
2023 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2024 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2025 }
2026 CommonActionTy Action(
2027 OMPBuilder.getOrCreateRuntimeFunction(
2028 CGM.getModule(),
2029 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2030 EnterArgs,
2031 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2032 OMPRTL___kmpc_end_critical),
2033 Args);
2034 CriticalOpGen.setAction(Action);
2035 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2036}
2037
2039 const RegionCodeGenTy &MasterOpGen,
2041 if (!CGF.HaveInsertPoint())
2042 return;
2043 // if(__kmpc_master(ident_t *, gtid)) {
2044 // MasterOpGen();
2045 // __kmpc_end_master(ident_t *, gtid);
2046 // }
2047 // Prepare arguments and build a call to __kmpc_master
2048 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2049 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2050 CGM.getModule(), OMPRTL___kmpc_master),
2051 Args,
2052 OMPBuilder.getOrCreateRuntimeFunction(
2053 CGM.getModule(), OMPRTL___kmpc_end_master),
2054 Args,
2055 /*Conditional=*/true);
2056 MasterOpGen.setAction(Action);
2057 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2058 Action.Done(CGF);
2059}
2060
2062 const RegionCodeGenTy &MaskedOpGen,
2063 SourceLocation Loc, const Expr *Filter) {
2064 if (!CGF.HaveInsertPoint())
2065 return;
2066 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2067 // MaskedOpGen();
2068 // __kmpc_end_masked(iden_t *, gtid);
2069 // }
2070 // Prepare arguments and build a call to __kmpc_masked
2071 llvm::Value *FilterVal = Filter
2072 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2073 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2074 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2075 FilterVal};
2076 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2077 getThreadID(CGF, Loc)};
2078 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2079 CGM.getModule(), OMPRTL___kmpc_masked),
2080 Args,
2081 OMPBuilder.getOrCreateRuntimeFunction(
2082 CGM.getModule(), OMPRTL___kmpc_end_masked),
2083 ArgsEnd,
2084 /*Conditional=*/true);
2085 MaskedOpGen.setAction(Action);
2086 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2087 Action.Done(CGF);
2088}
2089
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2095 OMPBuilder.createTaskyield(CGF.Builder);
2096 } else {
2097 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2098 llvm::Value *Args[] = {
2100 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2101 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2102 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2103 Args);
2104 }
2105
2106 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2107 Region->emitUntiedSwitch(CGF);
2108}
2109
2111 const RegionCodeGenTy &TaskgroupOpGen,
2113 if (!CGF.HaveInsertPoint())
2114 return;
2115 // __kmpc_taskgroup(ident_t *, gtid);
2116 // TaskgroupOpGen();
2117 // __kmpc_end_taskgroup(ident_t *, gtid);
2118 // Prepare arguments and build a call to __kmpc_taskgroup
2119 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2120 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2121 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2122 Args,
2123 OMPBuilder.getOrCreateRuntimeFunction(
2124 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2125 Args);
2126 TaskgroupOpGen.setAction(Action);
2127 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2128}
2129
2130/// Given an array of pointers to variables, project the address of a
2131/// given variable.
2133 unsigned Index, const VarDecl *Var) {
2134 // Pull out the pointer to the variable.
2135 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2136 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2137
2138 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2139 return Address(
2140 CGF.Builder.CreateBitCast(
2141 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2142 ElemTy, CGF.getContext().getDeclAlign(Var));
2143}
2144
2146 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2147 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2148 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2150 ASTContext &C = CGM.getContext();
2151 // void copy_func(void *LHSArg, void *RHSArg);
2152 FunctionArgList Args;
2153 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2155 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2157 Args.push_back(&LHSArg);
2158 Args.push_back(&RHSArg);
2159 const auto &CGFI =
2160 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2161 std::string Name =
2162 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2163 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2164 llvm::GlobalValue::InternalLinkage, Name,
2165 &CGM.getModule());
2167 Fn->setDoesNotRecurse();
2168 CodeGenFunction CGF(CGM);
2169 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2170 // Dest = (void*[n])(LHSArg);
2171 // Src = (void*[n])(RHSArg);
2173 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2174 ArgsElemType->getPointerTo()),
2175 ArgsElemType, CGF.getPointerAlign());
2177 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2178 ArgsElemType->getPointerTo()),
2179 ArgsElemType, CGF.getPointerAlign());
2180 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2181 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2182 // ...
2183 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2184 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2185 const auto *DestVar =
2186 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2187 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2188
2189 const auto *SrcVar =
2190 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2191 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2192
2193 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2194 QualType Type = VD->getType();
2195 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2196 }
2197 CGF.FinishFunction();
2198 return Fn;
2199}
2200
2202 const RegionCodeGenTy &SingleOpGen,
2204 ArrayRef<const Expr *> CopyprivateVars,
2205 ArrayRef<const Expr *> SrcExprs,
2206 ArrayRef<const Expr *> DstExprs,
2207 ArrayRef<const Expr *> AssignmentOps) {
2208 if (!CGF.HaveInsertPoint())
2209 return;
2210 assert(CopyprivateVars.size() == SrcExprs.size() &&
2211 CopyprivateVars.size() == DstExprs.size() &&
2212 CopyprivateVars.size() == AssignmentOps.size());
2214 // int32 did_it = 0;
2215 // if(__kmpc_single(ident_t *, gtid)) {
2216 // SingleOpGen();
2217 // __kmpc_end_single(ident_t *, gtid);
2218 // did_it = 1;
2219 // }
2220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2221 // <copy_func>, did_it);
2222
2223 Address DidIt = Address::invalid();
2224 if (!CopyprivateVars.empty()) {
2225 // int32 did_it = 0;
2226 QualType KmpInt32Ty =
2227 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2228 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2229 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2230 }
2231 // Prepare arguments and build a call to __kmpc_single
2232 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2233 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2234 CGM.getModule(), OMPRTL___kmpc_single),
2235 Args,
2236 OMPBuilder.getOrCreateRuntimeFunction(
2237 CGM.getModule(), OMPRTL___kmpc_end_single),
2238 Args,
2239 /*Conditional=*/true);
2240 SingleOpGen.setAction(Action);
2241 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2242 if (DidIt.isValid()) {
2243 // did_it = 1;
2244 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2245 }
2246 Action.Done(CGF);
2247 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2248 // <copy_func>, did_it);
2249 if (DidIt.isValid()) {
2250 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2251 QualType CopyprivateArrayTy = C.getConstantArrayType(
2252 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2253 /*IndexTypeQuals=*/0);
2254 // Create a list of all private variables for copyprivate.
2255 Address CopyprivateList =
2256 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2257 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2258 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2259 CGF.Builder.CreateStore(
2261 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2262 CGF.VoidPtrTy),
2263 Elem);
2264 }
2265 // Build function that copies private values from single region to all other
2266 // threads in the corresponding parallel region.
2267 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2268 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2269 SrcExprs, DstExprs, AssignmentOps, Loc);
2270 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2272 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2273 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2274 llvm::Value *Args[] = {
2275 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2276 getThreadID(CGF, Loc), // i32 <gtid>
2277 BufSize, // size_t <buf_size>
2278 CL.emitRawPointer(CGF), // void *<copyprivate list>
2279 CpyFn, // void (*) (void *, void *) <copy_func>
2280 DidItVal // i32 did_it
2281 };
2282 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2283 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2284 Args);
2285 }
2286}
2287
2289 const RegionCodeGenTy &OrderedOpGen,
2290 SourceLocation Loc, bool IsThreads) {
2291 if (!CGF.HaveInsertPoint())
2292 return;
2293 // __kmpc_ordered(ident_t *, gtid);
2294 // OrderedOpGen();
2295 // __kmpc_end_ordered(ident_t *, gtid);
2296 // Prepare arguments and build a call to __kmpc_ordered
2297 if (IsThreads) {
2298 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2299 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2300 CGM.getModule(), OMPRTL___kmpc_ordered),
2301 Args,
2302 OMPBuilder.getOrCreateRuntimeFunction(
2303 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2304 Args);
2305 OrderedOpGen.setAction(Action);
2306 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2307 return;
2308 }
2309 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2310}
2311
2313 unsigned Flags;
2314 if (Kind == OMPD_for)
2315 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2316 else if (Kind == OMPD_sections)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2318 else if (Kind == OMPD_single)
2319 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2320 else if (Kind == OMPD_barrier)
2321 Flags = OMP_IDENT_BARRIER_EXPL;
2322 else
2323 Flags = OMP_IDENT_BARRIER_IMPL;
2324 return Flags;
2325}
2326
2328 CodeGenFunction &CGF, const OMPLoopDirective &S,
2329 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2330 // Check if the loop directive is actually a doacross loop directive. In this
2331 // case choose static, 1 schedule.
2332 if (llvm::any_of(
2333 S.getClausesOfKind<OMPOrderedClause>(),
2334 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2335 ScheduleKind = OMPC_SCHEDULE_static;
2336 // Chunk size is 1 in this case.
2337 llvm::APInt ChunkSize(32, 1);
2338 ChunkExpr = IntegerLiteral::Create(
2339 CGF.getContext(), ChunkSize,
2340 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2341 SourceLocation());
2342 }
2343}
2344
2346 OpenMPDirectiveKind Kind, bool EmitChecks,
2347 bool ForceSimpleCall) {
2348 // Check if we should use the OMPBuilder
2349 auto *OMPRegionInfo =
2350 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2351 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2352 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2353 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2354 return;
2355 }
2356
2357 if (!CGF.HaveInsertPoint())
2358 return;
2359 // Build call __kmpc_cancel_barrier(loc, thread_id);
2360 // Build call __kmpc_barrier(loc, thread_id);
2361 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2362 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2363 // thread_id);
2364 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2365 getThreadID(CGF, Loc)};
2366 if (OMPRegionInfo) {
2367 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2368 llvm::Value *Result = CGF.EmitRuntimeCall(
2369 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2370 OMPRTL___kmpc_cancel_barrier),
2371 Args);
2372 if (EmitChecks) {
2373 // if (__kmpc_cancel_barrier()) {
2374 // exit from construct;
2375 // }
2376 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2377 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2378 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2379 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2380 CGF.EmitBlock(ExitBB);
2381 // exit from construct;
2382 CodeGenFunction::JumpDest CancelDestination =
2383 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2384 CGF.EmitBranchThroughCleanup(CancelDestination);
2385 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2386 }
2387 return;
2388 }
2389 }
2390 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2391 CGM.getModule(), OMPRTL___kmpc_barrier),
2392 Args);
2393}
2394
2396 Expr *ME, bool IsFatal) {
2397 llvm::Value *MVL =
2398 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2399 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2400 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2401 // *message)
2402 llvm::Value *Args[] = {
2403 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2404 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2405 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2407 CGM.getModule(), OMPRTL___kmpc_error),
2408 Args);
2409}
2410
2411/// Map the OpenMP loop schedule to the runtime enumeration.
2412static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2413 bool Chunked, bool Ordered) {
2414 switch (ScheduleKind) {
2415 case OMPC_SCHEDULE_static:
2416 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2417 : (Ordered ? OMP_ord_static : OMP_sch_static);
2418 case OMPC_SCHEDULE_dynamic:
2419 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2420 case OMPC_SCHEDULE_guided:
2421 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2422 case OMPC_SCHEDULE_runtime:
2423 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2424 case OMPC_SCHEDULE_auto:
2425 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2427 assert(!Chunked && "chunk was specified but schedule kind not known");
2428 return Ordered ? OMP_ord_static : OMP_sch_static;
2429 }
2430 llvm_unreachable("Unexpected runtime schedule");
2431}
2432
2433/// Map the OpenMP distribute schedule to the runtime enumeration.
2434static OpenMPSchedType
2436 // only static is allowed for dist_schedule
2437 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2438}
2439
2441 bool Chunked) const {
2442 OpenMPSchedType Schedule =
2443 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2444 return Schedule == OMP_sch_static;
2445}
2446
2448 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2449 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2450 return Schedule == OMP_dist_sch_static;
2451}
2452
2454 bool Chunked) const {
2455 OpenMPSchedType Schedule =
2456 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2457 return Schedule == OMP_sch_static_chunked;
2458}
2459
2461 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2462 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2463 return Schedule == OMP_dist_sch_static_chunked;
2464}
2465
2467 OpenMPSchedType Schedule =
2468 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2469 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2470 return Schedule != OMP_sch_static;
2471}
2472
2473static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2476 int Modifier = 0;
2477 switch (M1) {
2478 case OMPC_SCHEDULE_MODIFIER_monotonic:
2479 Modifier = OMP_sch_modifier_monotonic;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2482 Modifier = OMP_sch_modifier_nonmonotonic;
2483 break;
2484 case OMPC_SCHEDULE_MODIFIER_simd:
2485 if (Schedule == OMP_sch_static_chunked)
2486 Schedule = OMP_sch_static_balanced_chunked;
2487 break;
2490 break;
2491 }
2492 switch (M2) {
2493 case OMPC_SCHEDULE_MODIFIER_monotonic:
2494 Modifier = OMP_sch_modifier_monotonic;
2495 break;
2496 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 break;
2499 case OMPC_SCHEDULE_MODIFIER_simd:
2500 if (Schedule == OMP_sch_static_chunked)
2501 Schedule = OMP_sch_static_balanced_chunked;
2502 break;
2505 break;
2506 }
2507 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2508 // If the static schedule kind is specified or if the ordered clause is
2509 // specified, and if the nonmonotonic modifier is not specified, the effect is
2510 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2511 // modifier is specified, the effect is as if the nonmonotonic modifier is
2512 // specified.
2513 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2514 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2515 Schedule == OMP_sch_static_balanced_chunked ||
2516 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2517 Schedule == OMP_dist_sch_static_chunked ||
2518 Schedule == OMP_dist_sch_static))
2519 Modifier = OMP_sch_modifier_nonmonotonic;
2520 }
2521 return Schedule | Modifier;
2522}
2523
2526 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2527 bool Ordered, const DispatchRTInput &DispatchValues) {
2528 if (!CGF.HaveInsertPoint())
2529 return;
2530 OpenMPSchedType Schedule = getRuntimeSchedule(
2531 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2532 assert(Ordered ||
2533 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2534 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2535 Schedule != OMP_sch_static_balanced_chunked));
2536 // Call __kmpc_dispatch_init(
2537 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2538 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2539 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2540
2541 // If the Chunk was not specified in the clause - use default value 1.
2542 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2543 : CGF.Builder.getIntN(IVSize, 1);
2544 llvm::Value *Args[] = {
2545 emitUpdateLocation(CGF, Loc),
2546 getThreadID(CGF, Loc),
2547 CGF.Builder.getInt32(addMonoNonMonoModifier(
2548 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2549 DispatchValues.LB, // Lower
2550 DispatchValues.UB, // Upper
2551 CGF.Builder.getIntN(IVSize, 1), // Stride
2552 Chunk // Chunk
2553 };
2554 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2555 Args);
2556}
2557
2560 if (!CGF.HaveInsertPoint())
2561 return;
2562 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2563 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2564 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2565}
2566
2568 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2569 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2571 const CGOpenMPRuntime::StaticRTInput &Values) {
2572 if (!CGF.HaveInsertPoint())
2573 return;
2574
2575 assert(!Values.Ordered);
2576 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2577 Schedule == OMP_sch_static_balanced_chunked ||
2578 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2579 Schedule == OMP_dist_sch_static ||
2580 Schedule == OMP_dist_sch_static_chunked);
2581
2582 // Call __kmpc_for_static_init(
2583 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2584 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2585 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2586 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2587 llvm::Value *Chunk = Values.Chunk;
2588 if (Chunk == nullptr) {
2589 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2590 Schedule == OMP_dist_sch_static) &&
2591 "expected static non-chunked schedule");
2592 // If the Chunk was not specified in the clause - use default value 1.
2593 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2594 } else {
2595 assert((Schedule == OMP_sch_static_chunked ||
2596 Schedule == OMP_sch_static_balanced_chunked ||
2597 Schedule == OMP_ord_static_chunked ||
2598 Schedule == OMP_dist_sch_static_chunked) &&
2599 "expected static chunked schedule");
2600 }
2601 llvm::Value *Args[] = {
2602 UpdateLocation,
2603 ThreadId,
2604 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2605 M2)), // Schedule type
2606 Values.IL.emitRawPointer(CGF), // &isLastIter
2607 Values.LB.emitRawPointer(CGF), // &LB
2608 Values.UB.emitRawPointer(CGF), // &UB
2609 Values.ST.emitRawPointer(CGF), // &Stride
2610 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2611 Chunk // Chunk
2612 };
2613 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2614}
2615
2618 OpenMPDirectiveKind DKind,
2619 const OpenMPScheduleTy &ScheduleKind,
2620 const StaticRTInput &Values) {
2621 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2622 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2623 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2624 "Expected loop-based or sections-based directive.");
2625 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2627 ? OMP_IDENT_WORK_LOOP
2628 : OMP_IDENT_WORK_SECTIONS);
2629 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2630 llvm::FunctionCallee StaticInitFunction =
2631 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2632 false);
2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2636}
2637
2641 const CGOpenMPRuntime::StaticRTInput &Values) {
2642 OpenMPSchedType ScheduleNum =
2643 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2644 llvm::Value *UpdatedLocation =
2645 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2646 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2647 llvm::FunctionCallee StaticInitFunction;
2648 bool isGPUDistribute =
2649 CGM.getLangOpts().OpenMPIsTargetDevice &&
2650 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2651 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2652 Values.IVSize, Values.IVSigned, isGPUDistribute);
2653
2654 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2655 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2657}
2658
2661 OpenMPDirectiveKind DKind) {
2662 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2663 DKind == OMPD_sections) &&
2664 "Expected distribute, for, or sections directive kind");
2665 if (!CGF.HaveInsertPoint())
2666 return;
2667 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2668 llvm::Value *Args[] = {
2671 (DKind == OMPD_target_teams_loop)
2672 ? OMP_IDENT_WORK_DISTRIBUTE
2673 : isOpenMPLoopDirective(DKind)
2674 ? OMP_IDENT_WORK_LOOP
2675 : OMP_IDENT_WORK_SECTIONS),
2676 getThreadID(CGF, Loc)};
2678 if (isOpenMPDistributeDirective(DKind) &&
2679 CGM.getLangOpts().OpenMPIsTargetDevice &&
2680 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2681 CGF.EmitRuntimeCall(
2682 OMPBuilder.getOrCreateRuntimeFunction(
2683 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2684 Args);
2685 else
2686 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2687 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2688 Args);
2689}
2690
2693 unsigned IVSize,
2694 bool IVSigned) {
2695 if (!CGF.HaveInsertPoint())
2696 return;
2697 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2698 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2699 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2700 Args);
2701}
2702
2704 SourceLocation Loc, unsigned IVSize,
2705 bool IVSigned, Address IL,
2706 Address LB, Address UB,
2707 Address ST) {
2708 // Call __kmpc_dispatch_next(
2709 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2710 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2711 // kmp_int[32|64] *p_stride);
2712 llvm::Value *Args[] = {
2714 IL.emitRawPointer(CGF), // &isLastIter
2715 LB.emitRawPointer(CGF), // &Lower
2716 UB.emitRawPointer(CGF), // &Upper
2717 ST.emitRawPointer(CGF) // &Stride
2718 };
2719 llvm::Value *Call = CGF.EmitRuntimeCall(
2720 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2721 return CGF.EmitScalarConversion(
2722 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2723 CGF.getContext().BoolTy, Loc);
2724}
2725
2727 llvm::Value *NumThreads,
2729 if (!CGF.HaveInsertPoint())
2730 return;
2731 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2732 llvm::Value *Args[] = {
2734 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2735 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2736 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2737 Args);
2738}
2739
2741 ProcBindKind ProcBind,
2743 if (!CGF.HaveInsertPoint())
2744 return;
2745 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2746 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2747 llvm::Value *Args[] = {
2749 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2750 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2751 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2752 Args);
2753}
2754
2756 SourceLocation Loc, llvm::AtomicOrdering AO) {
2757 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2758 OMPBuilder.createFlush(CGF.Builder);
2759 } else {
2760 if (!CGF.HaveInsertPoint())
2761 return;
2762 // Build call void __kmpc_flush(ident_t *loc)
2763 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2764 CGM.getModule(), OMPRTL___kmpc_flush),
2765 emitUpdateLocation(CGF, Loc));
2766 }
2767}
2768
2769namespace {
2770/// Indexes of fields for type kmp_task_t.
2771enum KmpTaskTFields {
2772 /// List of shared variables.
2773 KmpTaskTShareds,
2774 /// Task routine.
2775 KmpTaskTRoutine,
2776 /// Partition id for the untied tasks.
2777 KmpTaskTPartId,
2778 /// Function with call of destructors for private variables.
2779 Data1,
2780 /// Task priority.
2781 Data2,
2782 /// (Taskloops only) Lower bound.
2783 KmpTaskTLowerBound,
2784 /// (Taskloops only) Upper bound.
2785 KmpTaskTUpperBound,
2786 /// (Taskloops only) Stride.
2787 KmpTaskTStride,
2788 /// (Taskloops only) Is last iteration flag.
2789 KmpTaskTLastIter,
2790 /// (Taskloops only) Reduction data.
2791 KmpTaskTReductions,
2792};
2793} // anonymous namespace
2794
2796 // If we are in simd mode or there are no entries, we don't need to do
2797 // anything.
2798 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2799 return;
2800
2801 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2802 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2803 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2805 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2806 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2808 I != E; ++I) {
2809 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2810 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2812 I->getFirst(), EntryInfo.Line, 1);
2813 break;
2814 }
2815 }
2816 }
2817 switch (Kind) {
2818 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2819 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2820 DiagnosticsEngine::Error, "Offloading entry for target region in "
2821 "%0 is incorrect: either the "
2822 "address or the ID is invalid.");
2823 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2824 } break;
2825 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2826 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2827 DiagnosticsEngine::Error, "Offloading entry for declare target "
2828 "variable %0 is incorrect: the "
2829 "address is invalid.");
2830 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2831 } break;
2832 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2833 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2835 "Offloading entry for declare target variable is incorrect: the "
2836 "address is invalid.");
2837 CGM.getDiags().Report(DiagID);
2838 } break;
2839 }
2840 };
2841
2842 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2843}
2844
2846 if (!KmpRoutineEntryPtrTy) {
2847 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2849 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2851 KmpRoutineEntryPtrQTy = C.getPointerType(
2852 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2854 }
2855}
2856
2857namespace {
2858struct PrivateHelpersTy {
2859 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2860 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2861 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2862 PrivateElemInit(PrivateElemInit) {}
2863 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2864 const Expr *OriginalRef = nullptr;
2865 const VarDecl *Original = nullptr;
2866 const VarDecl *PrivateCopy = nullptr;
2867 const VarDecl *PrivateElemInit = nullptr;
2868 bool isLocalPrivate() const {
2869 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2870 }
2871};
2872typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2873} // anonymous namespace
2874
2875static bool isAllocatableDecl(const VarDecl *VD) {
2876 const VarDecl *CVD = VD->getCanonicalDecl();
2877 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2878 return false;
2879 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2880 // Use the default allocation.
2881 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2882 !AA->getAllocator());
2883}
2884
2885static RecordDecl *
2887 if (!Privates.empty()) {
2888 ASTContext &C = CGM.getContext();
2889 // Build struct .kmp_privates_t. {
2890 // /* private vars */
2891 // };
2892 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2893 RD->startDefinition();
2894 for (const auto &Pair : Privates) {
2895 const VarDecl *VD = Pair.second.Original;
2897 // If the private variable is a local variable with lvalue ref type,
2898 // allocate the pointer instead of the pointee type.
2899 if (Pair.second.isLocalPrivate()) {
2900 if (VD->getType()->isLValueReferenceType())
2901 Type = C.getPointerType(Type);
2902 if (isAllocatableDecl(VD))
2903 Type = C.getPointerType(Type);
2904 }
2906 if (VD->hasAttrs()) {
2907 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2908 E(VD->getAttrs().end());
2909 I != E; ++I)
2910 FD->addAttr(*I);
2911 }
2912 }
2913 RD->completeDefinition();
2914 return RD;
2915 }
2916 return nullptr;
2917}
2918
2919static RecordDecl *
2921 QualType KmpInt32Ty,
2922 QualType KmpRoutineEntryPointerQTy) {
2923 ASTContext &C = CGM.getContext();
2924 // Build struct kmp_task_t {
2925 // void * shareds;
2926 // kmp_routine_entry_t routine;
2927 // kmp_int32 part_id;
2928 // kmp_cmplrdata_t data1;
2929 // kmp_cmplrdata_t data2;
2930 // For taskloops additional fields:
2931 // kmp_uint64 lb;
2932 // kmp_uint64 ub;
2933 // kmp_int64 st;
2934 // kmp_int32 liter;
2935 // void * reductions;
2936 // };
2937 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2938 UD->startDefinition();
2939 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2940 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2941 UD->completeDefinition();
2942 QualType KmpCmplrdataTy = C.getRecordType(UD);
2943 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2944 RD->startDefinition();
2945 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2946 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2947 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2949 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2950 if (isOpenMPTaskLoopDirective(Kind)) {
2951 QualType KmpUInt64Ty =
2952 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2953 QualType KmpInt64Ty =
2954 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2955 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2956 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2957 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2958 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2959 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2960 }
2961 RD->completeDefinition();
2962 return RD;
2963}
2964
2965static RecordDecl *
2967 ArrayRef<PrivateDataTy> Privates) {
2968 ASTContext &C = CGM.getContext();
2969 // Build struct kmp_task_t_with_privates {
2970 // kmp_task_t task_data;
2971 // .kmp_privates_t. privates;
2972 // };
2973 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2974 RD->startDefinition();
2975 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2976 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2977 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
2978 RD->completeDefinition();
2979 return RD;
2980}
2981
2982/// Emit a proxy function which accepts kmp_task_t as the second
2983/// argument.
2984/// \code
2985/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2986/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2987/// For taskloops:
2988/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2989/// tt->reductions, tt->shareds);
2990/// return 0;
2991/// }
2992/// \endcode
2993static llvm::Function *
2995 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2996 QualType KmpTaskTWithPrivatesPtrQTy,
2997 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2998 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2999 llvm::Value *TaskPrivatesMap) {
3000 ASTContext &C = CGM.getContext();
3001 FunctionArgList Args;
3002 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3004 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3005 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3007 Args.push_back(&GtidArg);
3008 Args.push_back(&TaskTypeArg);
3009 const auto &TaskEntryFnInfo =
3010 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3011 llvm::FunctionType *TaskEntryTy =
3012 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3013 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3014 auto *TaskEntry = llvm::Function::Create(
3015 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3016 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3017 TaskEntry->setDoesNotRecurse();
3018 CodeGenFunction CGF(CGM);
3019 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3020 Loc, Loc);
3021
3022 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3023 // tt,
3024 // For taskloops:
3025 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3026 // tt->task_data.shareds);
3027 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3028 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3029 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3030 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3031 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3032 const auto *KmpTaskTWithPrivatesQTyRD =
3033 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3034 LValue Base =
3035 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3036 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3037 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3038 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3039 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3040
3041 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3042 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3043 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3044 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3045 CGF.ConvertTypeForMem(SharedsPtrTy));
3046
3047 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3048 llvm::Value *PrivatesParam;
3049 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3050 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3051 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3052 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3053 } else {
3054 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3055 }
3056
3057 llvm::Value *CommonArgs[] = {
3058 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3059 CGF.Builder
3061 CGF.VoidPtrTy, CGF.Int8Ty)
3062 .emitRawPointer(CGF)};
3063 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3064 std::end(CommonArgs));
3065 if (isOpenMPTaskLoopDirective(Kind)) {
3066 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3067 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3068 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3069 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3070 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3071 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3072 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3073 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3074 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3075 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3076 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3077 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3078 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3079 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3080 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3081 CallArgs.push_back(LBParam);
3082 CallArgs.push_back(UBParam);
3083 CallArgs.push_back(StParam);
3084 CallArgs.push_back(LIParam);
3085 CallArgs.push_back(RParam);
3086 }
3087 CallArgs.push_back(SharedsParam);
3088
3089 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3090 CallArgs);
3091 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3092 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3093 CGF.FinishFunction();
3094 return TaskEntry;
3095}
3096
3099 QualType KmpInt32Ty,
3100 QualType KmpTaskTWithPrivatesPtrQTy,
3101 QualType KmpTaskTWithPrivatesQTy) {
3102 ASTContext &C = CGM.getContext();
3103 FunctionArgList Args;
3104 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3106 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3107 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3109 Args.push_back(&GtidArg);
3110 Args.push_back(&TaskTypeArg);
3111 const auto &DestructorFnInfo =
3112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3113 llvm::FunctionType *DestructorFnTy =
3114 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3115 std::string Name =
3116 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3117 auto *DestructorFn =
3118 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3119 Name, &CGM.getModule());
3120 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3121 DestructorFnInfo);
3122 DestructorFn->setDoesNotRecurse();
3123 CodeGenFunction CGF(CGM);
3124 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3125 Args, Loc, Loc);
3126
3128 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3129 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3130 const auto *KmpTaskTWithPrivatesQTyRD =
3131 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3132 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3133 Base = CGF.EmitLValueForField(Base, *FI);
3134 for (const auto *Field :
3135 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3136 if (QualType::DestructionKind DtorKind =
3137 Field->getType().isDestructedType()) {
3138 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3139 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3140 }
3141 }
3142 CGF.FinishFunction();
3143 return DestructorFn;
3144}
3145
3146/// Emit a privates mapping function for correct handling of private and
3147/// firstprivate variables.
3148/// \code
3149/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3150/// **noalias priv1,..., <tyn> **noalias privn) {
3151/// *priv1 = &.privates.priv1;
3152/// ...;
3153/// *privn = &.privates.privn;
3154/// }
3155/// \endcode
3156static llvm::Value *
3158 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3159 ArrayRef<PrivateDataTy> Privates) {
3160 ASTContext &C = CGM.getContext();
3161 FunctionArgList Args;
3162 ImplicitParamDecl TaskPrivatesArg(
3163 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3164 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3166 Args.push_back(&TaskPrivatesArg);
3167 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3168 unsigned Counter = 1;
3169 for (const Expr *E : Data.PrivateVars) {
3170 Args.push_back(ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3172 C.getPointerType(C.getPointerType(E->getType()))
3173 .withConst()
3174 .withRestrict(),
3176 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.FirstprivateVars) {
3181 Args.push_back(ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3183 C.getPointerType(C.getPointerType(E->getType()))
3184 .withConst()
3185 .withRestrict(),
3187 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const Expr *E : Data.LastprivateVars) {
3192 Args.push_back(ImplicitParamDecl::Create(
3193 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3194 C.getPointerType(C.getPointerType(E->getType()))
3195 .withConst()
3196 .withRestrict(),
3198 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3199 PrivateVarsPos[VD] = Counter;
3200 ++Counter;
3201 }
3202 for (const VarDecl *VD : Data.PrivateLocals) {
3204 if (VD->getType()->isLValueReferenceType())
3205 Ty = C.getPointerType(Ty);
3206 if (isAllocatableDecl(VD))
3207 Ty = C.getPointerType(Ty);
3208 Args.push_back(ImplicitParamDecl::Create(
3209 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3210 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3212 PrivateVarsPos[VD] = Counter;
3213 ++Counter;
3214 }
3215 const auto &TaskPrivatesMapFnInfo =
3216 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3217 llvm::FunctionType *TaskPrivatesMapTy =
3218 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3219 std::string Name =
3220 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3221 auto *TaskPrivatesMap = llvm::Function::Create(
3222 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3223 &CGM.getModule());
3224 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3225 TaskPrivatesMapFnInfo);
3226 if (CGM.getLangOpts().Optimize) {
3227 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3228 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3229 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3230 }
3231 CodeGenFunction CGF(CGM);
3232 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3233 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3234
3235 // *privi = &.privates.privi;
3237 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3238 TaskPrivatesArg.getType()->castAs<PointerType>());
3239 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3240 Counter = 0;
3241 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3242 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3243 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3244 LValue RefLVal =
3245 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3246 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3247 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3248 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3249 ++Counter;
3250 }
3251 CGF.FinishFunction();
3252 return TaskPrivatesMap;
3253}
3254
3255/// Emit initialization for private variables in task-based directives.
3258 Address KmpTaskSharedsPtr, LValue TDBase,
3259 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3260 QualType SharedsTy, QualType SharedsPtrTy,
3261 const OMPTaskDataTy &Data,
3262 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3263 ASTContext &C = CGF.getContext();
3264 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3265 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3266 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3267 ? OMPD_taskloop
3268 : OMPD_task;
3269 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3270 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3271 LValue SrcBase;
3272 bool IsTargetTask =
3273 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3274 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3275 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3276 // PointersArray, SizesArray, and MappersArray. The original variables for
3277 // these arrays are not captured and we get their addresses explicitly.
3278 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3279 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3280 SrcBase = CGF.MakeAddrLValue(
3282 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3283 CGF.ConvertTypeForMem(SharedsTy)),
3284 SharedsTy);
3285 }
3286 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3287 for (const PrivateDataTy &Pair : Privates) {
3288 // Do not initialize private locals.
3289 if (Pair.second.isLocalPrivate()) {
3290 ++FI;
3291 continue;
3292 }
3293 const VarDecl *VD = Pair.second.PrivateCopy;
3294 const Expr *Init = VD->getAnyInitializer();
3295 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3296 !CGF.isTrivialInitializer(Init)))) {
3297 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3298 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3299 const VarDecl *OriginalVD = Pair.second.Original;
3300 // Check if the variable is the target-based BasePointersArray,
3301 // PointersArray, SizesArray, or MappersArray.
3302 LValue SharedRefLValue;
3303 QualType Type = PrivateLValue.getType();
3304 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3305 if (IsTargetTask && !SharedField) {
3306 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3307 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3308 cast<CapturedDecl>(OriginalVD->getDeclContext())
3309 ->getNumParams() == 0 &&
3310 isa<TranslationUnitDecl>(
3311 cast<CapturedDecl>(OriginalVD->getDeclContext())
3312 ->getDeclContext()) &&
3313 "Expected artificial target data variable.");
3314 SharedRefLValue =
3315 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3316 } else if (ForDup) {
3317 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3318 SharedRefLValue = CGF.MakeAddrLValue(
3319 SharedRefLValue.getAddress().withAlignment(
3320 C.getDeclAlign(OriginalVD)),
3321 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3322 SharedRefLValue.getTBAAInfo());
3323 } else if (CGF.LambdaCaptureFields.count(
3324 Pair.second.Original->getCanonicalDecl()) > 0 ||
3325 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3326 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3327 } else {
3328 // Processing for implicitly captured variables.
3329 InlinedOpenMPRegionRAII Region(
3330 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3331 /*HasCancel=*/false, /*NoInheritance=*/true);
3332 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3333 }
3334 if (Type->isArrayType()) {
3335 // Initialize firstprivate array.
3336 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3337 // Perform simple memcpy.
3338 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3339 } else {
3340 // Initialize firstprivate array using element-by-element
3341 // initialization.
3343 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3344 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3345 Address SrcElement) {
3346 // Clean up any temporaries needed by the initialization.
3347 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3348 InitScope.addPrivate(Elem, SrcElement);
3349 (void)InitScope.Privatize();
3350 // Emit initialization for single element.
3351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3352 CGF, &CapturesInfo);
3353 CGF.EmitAnyExprToMem(Init, DestElement,
3354 Init->getType().getQualifiers(),
3355 /*IsInitializer=*/false);
3356 });
3357 }
3358 } else {
3359 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3360 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3361 (void)InitScope.Privatize();
3362 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3363 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3364 /*capturedByInit=*/false);
3365 }
3366 } else {
3367 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3368 }
3369 }
3370 ++FI;
3371 }
3372}
3373
3374/// Check if duplication function is required for taskloops.
3376 ArrayRef<PrivateDataTy> Privates) {
3377 bool InitRequired = false;
3378 for (const PrivateDataTy &Pair : Privates) {
3379 if (Pair.second.isLocalPrivate())
3380 continue;
3381 const VarDecl *VD = Pair.second.PrivateCopy;
3382 const Expr *Init = VD->getAnyInitializer();
3383 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3385 if (InitRequired)
3386 break;
3387 }
3388 return InitRequired;
3389}
3390
3391
3392/// Emit task_dup function (for initialization of
3393/// private/firstprivate/lastprivate vars and last_iter flag)
3394/// \code
3395/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3396/// lastpriv) {
3397/// // setup lastprivate flag
3398/// task_dst->last = lastpriv;
3399/// // could be constructor calls here...
3400/// }
3401/// \endcode
3402static llvm::Value *
3405 QualType KmpTaskTWithPrivatesPtrQTy,
3406 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3407 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3408 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3409 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3410 ASTContext &C = CGM.getContext();
3411 FunctionArgList Args;
3412 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3413 KmpTaskTWithPrivatesPtrQTy,
3415 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3416 KmpTaskTWithPrivatesPtrQTy,
3418 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3420 Args.push_back(&DstArg);
3421 Args.push_back(&SrcArg);
3422 Args.push_back(&LastprivArg);
3423 const auto &TaskDupFnInfo =
3424 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3425 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3426 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3427 auto *TaskDup = llvm::Function::Create(
3428 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3429 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3430 TaskDup->setDoesNotRecurse();
3431 CodeGenFunction CGF(CGM);
3432 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3433 Loc);
3434
3435 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3436 CGF.GetAddrOfLocalVar(&DstArg),
3437 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3438 // task_dst->liter = lastpriv;
3439 if (WithLastIter) {
3440 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3442 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3443 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3444 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3445 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3446 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3447 }
3448
3449 // Emit initial values for private copies (if any).
3450 assert(!Privates.empty());
3451 Address KmpTaskSharedsPtr = Address::invalid();
3452 if (!Data.FirstprivateVars.empty()) {
3453 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3454 CGF.GetAddrOfLocalVar(&SrcArg),
3455 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3457 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3458 KmpTaskSharedsPtr = Address(
3460 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3461 KmpTaskTShareds)),
3462 Loc),
3463 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3464 }
3465 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3466 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3467 CGF.FinishFunction();
3468 return TaskDup;
3469}
3470
3471/// Checks if destructor function is required to be generated.
3472/// \return true if cleanups are required, false otherwise.
3473static bool
3474checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3475 ArrayRef<PrivateDataTy> Privates) {
3476 for (const PrivateDataTy &P : Privates) {
3477 if (P.second.isLocalPrivate())
3478 continue;
3479 QualType Ty = P.second.Original->getType().getNonReferenceType();
3480 if (Ty.isDestructedType())
3481 return true;
3482 }
3483 return false;
3484}
3485
3486namespace {
3487/// Loop generator for OpenMP iterator expression.
3488class OMPIteratorGeneratorScope final
3489 : public CodeGenFunction::OMPPrivateScope {
3490 CodeGenFunction &CGF;
3491 const OMPIteratorExpr *E = nullptr;
3494 OMPIteratorGeneratorScope() = delete;
3495 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3496
3497public:
3498 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3499 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3500 if (!E)
3501 return;
3503 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3504 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3505 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3506 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3507 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3508 addPrivate(
3509 HelperData.CounterVD,
3510 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3511 }
3512 Privatize();
3513
3514 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3515 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3516 LValue CLVal =
3517 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3518 HelperData.CounterVD->getType());
3519 // Counter = 0;
3521 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3522 CLVal);
3523 CodeGenFunction::JumpDest &ContDest =
3524 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3525 CodeGenFunction::JumpDest &ExitDest =
3526 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3527 // N = <number-of_iterations>;
3528 llvm::Value *N = Uppers[I];
3529 // cont:
3530 // if (Counter < N) goto body; else goto exit;
3531 CGF.EmitBlock(ContDest.getBlock());
3532 auto *CVal =
3533 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3534 llvm::Value *Cmp =
3536 ? CGF.Builder.CreateICmpSLT(CVal, N)
3537 : CGF.Builder.CreateICmpULT(CVal, N);
3538 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3539 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3540 // body:
3541 CGF.EmitBlock(BodyBB);
3542 // Iteri = Begini + Counter * Stepi;
3543 CGF.EmitIgnoredExpr(HelperData.Update);
3544 }
3545 }
3546 ~OMPIteratorGeneratorScope() {
3547 if (!E)
3548 return;
3549 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3550 // Counter = Counter + 1;
3551 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3552 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3553 // goto cont;
3554 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3555 // exit:
3556 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3557 }
3558 }
3559};
3560} // namespace
3561
3562static std::pair<llvm::Value *, llvm::Value *>
3564 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3565 llvm::Value *Addr;
3566 if (OASE) {
3567 const Expr *Base = OASE->getBase();
3568 Addr = CGF.EmitScalarExpr(Base);
3569 } else {
3570 Addr = CGF.EmitLValue(E).getPointer(CGF);
3571 }
3572 llvm::Value *SizeVal;
3573 QualType Ty = E->getType();
3574 if (OASE) {
3575 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3576 for (const Expr *SE : OASE->getDimensions()) {
3577 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3578 Sz = CGF.EmitScalarConversion(
3579 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3580 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3581 }
3582 } else if (const auto *ASE =
3583 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3584 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3585 Address UpAddrAddress = UpAddrLVal.getAddress();
3586 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3587 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3588 /*Idx0=*/1);
3589 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3590 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3591 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3592 } else {
3593 SizeVal = CGF.getTypeSize(Ty);
3594 }
3595 return std::make_pair(Addr, SizeVal);
3596}
3597
3598/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3599static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3600 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3601 if (KmpTaskAffinityInfoTy.isNull()) {
3602 RecordDecl *KmpAffinityInfoRD =
3603 C.buildImplicitRecord("kmp_task_affinity_info_t");
3604 KmpAffinityInfoRD->startDefinition();
3605 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3606 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3607 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3608 KmpAffinityInfoRD->completeDefinition();
3609 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3610 }
3611}
3612
3616 llvm::Function *TaskFunction, QualType SharedsTy,
3617 Address Shareds, const OMPTaskDataTy &Data) {
3620 // Aggregate privates and sort them by the alignment.
3621 const auto *I = Data.PrivateCopies.begin();
3622 for (const Expr *E : Data.PrivateVars) {
3623 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3624 Privates.emplace_back(
3625 C.getDeclAlign(VD),
3626 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3627 /*PrivateElemInit=*/nullptr));
3628 ++I;
3629 }
3630 I = Data.FirstprivateCopies.begin();
3631 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3632 for (const Expr *E : Data.FirstprivateVars) {
3633 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3634 Privates.emplace_back(
3635 C.getDeclAlign(VD),
3636 PrivateHelpersTy(
3637 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3638 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3639 ++I;
3640 ++IElemInitRef;
3641 }
3642 I = Data.LastprivateCopies.begin();
3643 for (const Expr *E : Data.LastprivateVars) {
3644 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3645 Privates.emplace_back(
3646 C.getDeclAlign(VD),
3647 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3648 /*PrivateElemInit=*/nullptr));
3649 ++I;
3650 }
3651 for (const VarDecl *VD : Data.PrivateLocals) {
3652 if (isAllocatableDecl(VD))
3653 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3654 else
3655 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3656 }
3657 llvm::stable_sort(Privates,
3658 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3659 return L.first > R.first;
3660 });
3661 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3662 // Build type kmp_routine_entry_t (if not built yet).
3663 emitKmpRoutineEntryT(KmpInt32Ty);
3664 // Build type kmp_task_t (if not built yet).
3665 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3668 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3669 }
3671 } else {
3672 assert((D.getDirectiveKind() == OMPD_task ||
3673 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3674 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3675 "Expected taskloop, task or target directive");
3676 if (SavedKmpTaskTQTy.isNull()) {
3678 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3679 }
3681 }
3682 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3683 // Build particular struct kmp_task_t for the given task.
3684 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3686 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3687 QualType KmpTaskTWithPrivatesPtrQTy =
3688 C.getPointerType(KmpTaskTWithPrivatesQTy);
3689 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3690 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3691 KmpTaskTWithPrivatesTy->getPointerTo();
3692 llvm::Value *KmpTaskTWithPrivatesTySize =
3693 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3694 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3695
3696 // Emit initial values for private copies (if any).
3697 llvm::Value *TaskPrivatesMap = nullptr;
3698 llvm::Type *TaskPrivatesMapTy =
3699 std::next(TaskFunction->arg_begin(), 3)->getType();
3700 if (!Privates.empty()) {
3701 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3702 TaskPrivatesMap =
3703 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3704 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3705 TaskPrivatesMap, TaskPrivatesMapTy);
3706 } else {
3707 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3708 cast<llvm::PointerType>(TaskPrivatesMapTy));
3709 }
3710 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3711 // kmp_task_t *tt);
3712 llvm::Function *TaskEntry = emitProxyTaskFunction(
3713 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3714 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3715 TaskPrivatesMap);
3716
3717 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3718 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3719 // kmp_routine_entry_t *task_entry);
3720 // Task flags. Format is taken from
3721 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3722 // description of kmp_tasking_flags struct.
3723 enum {
3724 TiedFlag = 0x1,
3725 FinalFlag = 0x2,
3726 DestructorsFlag = 0x8,
3727 PriorityFlag = 0x20,
3728 DetachableFlag = 0x40,
3729 };
3730 unsigned Flags = Data.Tied ? TiedFlag : 0;
3731 bool NeedsCleanup = false;
3732 if (!Privates.empty()) {
3733 NeedsCleanup =
3734 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3735 if (NeedsCleanup)
3736 Flags = Flags | DestructorsFlag;
3737 }
3738 if (Data.Priority.getInt())
3739 Flags = Flags | PriorityFlag;
3740 if (D.hasClausesOfKind<OMPDetachClause>())
3741 Flags = Flags | DetachableFlag;
3742 llvm::Value *TaskFlags =
3743 Data.Final.getPointer()
3744 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3745 CGF.Builder.getInt32(FinalFlag),
3746 CGF.Builder.getInt32(/*C=*/0))
3747 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3748 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3749 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3751 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3753 TaskEntry, KmpRoutineEntryPtrTy)};
3754 llvm::Value *NewTask;
3755 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3756 // Check if we have any device clause associated with the directive.
3757 const Expr *Device = nullptr;
3758 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3759 Device = C->getDevice();
3760 // Emit device ID if any otherwise use default value.
3761 llvm::Value *DeviceID;
3762 if (Device)
3763 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3764 CGF.Int64Ty, /*isSigned=*/true);
3765 else
3766 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3767 AllocArgs.push_back(DeviceID);
3768 NewTask = CGF.EmitRuntimeCall(
3769 OMPBuilder.getOrCreateRuntimeFunction(
3770 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3771 AllocArgs);
3772 } else {
3773 NewTask =
3774 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3775 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3776 AllocArgs);
3777 }
3778 // Emit detach clause initialization.
3779 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3780 // task_descriptor);
3781 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3782 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3783 LValue EvtLVal = CGF.EmitLValue(Evt);
3784
3785 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3786 // int gtid, kmp_task_t *task);
3787 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3788 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3789 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3790 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3791 OMPBuilder.getOrCreateRuntimeFunction(
3792 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3793 {Loc, Tid, NewTask});
3794 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3795 Evt->getExprLoc());
3796 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3797 }
3798 // Process affinity clauses.
3799 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3800 // Process list of affinity data.
3802 Address AffinitiesArray = Address::invalid();
3803 // Calculate number of elements to form the array of affinity data.
3804 llvm::Value *NumOfElements = nullptr;
3805 unsigned NumAffinities = 0;
3806 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3807 if (const Expr *Modifier = C->getModifier()) {
3808 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3809 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3810 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3811 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3812 NumOfElements =
3813 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3814 }
3815 } else {
3816 NumAffinities += C->varlist_size();
3817 }
3818 }
3820 // Fields ids in kmp_task_affinity_info record.
3821 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3822
3823 QualType KmpTaskAffinityInfoArrayTy;
3824 if (NumOfElements) {
3825 NumOfElements = CGF.Builder.CreateNUWAdd(
3826 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3827 auto *OVE = new (C) OpaqueValueExpr(
3828 Loc,
3829 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3830 VK_PRValue);
3831 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3832 RValue::get(NumOfElements));
3833 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3835 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3836 // Properly emit variable-sized array.
3837 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3839 CGF.EmitVarDecl(*PD);
3840 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3841 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3842 /*isSigned=*/false);
3843 } else {
3844 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3846 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3847 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3848 AffinitiesArray =
3849 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3850 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3851 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3852 /*isSigned=*/false);
3853 }
3854
3855 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3856 // Fill array by elements without iterators.
3857 unsigned Pos = 0;
3858 bool HasIterator = false;
3859 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3860 if (C->getModifier()) {
3861 HasIterator = true;
3862 continue;
3863 }
3864 for (const Expr *E : C->varlists()) {
3865 llvm::Value *Addr;
3866 llvm::Value *Size;
3867 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3868 LValue Base =
3869 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3871 // affs[i].base_addr = &<Affinities[i].second>;
3872 LValue BaseAddrLVal = CGF.EmitLValueForField(
3873 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3874 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3875 BaseAddrLVal);
3876 // affs[i].len = sizeof(<Affinities[i].second>);
3877 LValue LenLVal = CGF.EmitLValueForField(
3878 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3879 CGF.EmitStoreOfScalar(Size, LenLVal);
3880 ++Pos;
3881 }
3882 }
3883 LValue PosLVal;
3884 if (HasIterator) {
3885 PosLVal = CGF.MakeAddrLValue(
3886 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3887 C.getSizeType());
3888 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3889 }
3890 // Process elements with iterators.
3891 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3892 const Expr *Modifier = C->getModifier();
3893 if (!Modifier)
3894 continue;
3895 OMPIteratorGeneratorScope IteratorScope(
3896 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3897 for (const Expr *E : C->varlists()) {
3898 llvm::Value *Addr;
3899 llvm::Value *Size;
3900 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3901 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3902 LValue Base =
3903 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3905 // affs[i].base_addr = &<Affinities[i].second>;
3906 LValue BaseAddrLVal = CGF.EmitLValueForField(
3907 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3908 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3909 BaseAddrLVal);
3910 // affs[i].len = sizeof(<Affinities[i].second>);
3911 LValue LenLVal = CGF.EmitLValueForField(
3912 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3913 CGF.EmitStoreOfScalar(Size, LenLVal);
3914 Idx = CGF.Builder.CreateNUWAdd(
3915 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3916 CGF.EmitStoreOfScalar(Idx, PosLVal);
3917 }
3918 }
3919 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3920 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3921 // naffins, kmp_task_affinity_info_t *affin_list);
3922 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3923 llvm::Value *GTid = getThreadID(CGF, Loc);
3924 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3925 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3926 // FIXME: Emit the function and ignore its result for now unless the
3927 // runtime function is properly implemented.
3928 (void)CGF.EmitRuntimeCall(
3929 OMPBuilder.getOrCreateRuntimeFunction(
3930 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3931 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3932 }
3933 llvm::Value *NewTaskNewTaskTTy =
3935 NewTask, KmpTaskTWithPrivatesPtrTy);
3936 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3937 KmpTaskTWithPrivatesQTy);
3938 LValue TDBase =
3939 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3940 // Fill the data in the resulting kmp_task_t record.
3941 // Copy shareds if there are any.
3942 Address KmpTaskSharedsPtr = Address::invalid();
3943 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3944 KmpTaskSharedsPtr = Address(
3945 CGF.EmitLoadOfScalar(
3947 TDBase,
3948 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3949 Loc),
3950 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3951 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3952 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3953 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3954 }
3955 // Emit initial values for private copies (if any).
3957 if (!Privates.empty()) {
3958 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3959 SharedsTy, SharedsPtrTy, Data, Privates,
3960 /*ForDup=*/false);
3961 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3962 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3963 Result.TaskDupFn = emitTaskDupFunction(
3964 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3965 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3966 /*WithLastIter=*/!Data.LastprivateVars.empty());
3967 }
3968 }
3969 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3970 enum { Priority = 0, Destructors = 1 };
3971 // Provide pointer to function with destructors for privates.
3972 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3973 const RecordDecl *KmpCmplrdataUD =
3974 (*FI)->getType()->getAsUnionType()->getDecl();
3975 if (NeedsCleanup) {
3976 llvm::Value *DestructorFn = emitDestructorsFunction(
3977 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3978 KmpTaskTWithPrivatesQTy);
3979 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3980 LValue DestructorsLV = CGF.EmitLValueForField(
3981 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3983 DestructorFn, KmpRoutineEntryPtrTy),
3984 DestructorsLV);
3985 }
3986 // Set priority.
3987 if (Data.Priority.getInt()) {
3988 LValue Data2LV = CGF.EmitLValueForField(
3989 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3990 LValue PriorityLV = CGF.EmitLValueForField(
3991 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
3992 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
3993 }
3994 Result.NewTask = NewTask;
3995 Result.TaskEntry = TaskEntry;
3996 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3997 Result.TDBase = TDBase;
3998 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3999 return Result;
4000}
4001
4002/// Translates internal dependency kind into the runtime kind.
4004 RTLDependenceKindTy DepKind;
4005 switch (K) {
4006 case OMPC_DEPEND_in:
4007 DepKind = RTLDependenceKindTy::DepIn;
4008 break;
4009 // Out and InOut dependencies must use the same code.
4010 case OMPC_DEPEND_out:
4011 case OMPC_DEPEND_inout:
4012 DepKind = RTLDependenceKindTy::DepInOut;
4013 break;
4014 case OMPC_DEPEND_mutexinoutset:
4015 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4016 break;
4017 case OMPC_DEPEND_inoutset:
4018 DepKind = RTLDependenceKindTy::DepInOutSet;
4019 break;
4020 case OMPC_DEPEND_outallmemory:
4021 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4022 break;
4023 case OMPC_DEPEND_source:
4024 case OMPC_DEPEND_sink:
4025 case OMPC_DEPEND_depobj:
4026 case OMPC_DEPEND_inoutallmemory:
4028 llvm_unreachable("Unknown task dependence type");
4029 }
4030 return DepKind;
4031}
4032
4033/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4034static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4035 QualType &FlagsTy) {
4036 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4037 if (KmpDependInfoTy.isNull()) {
4038 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4039 KmpDependInfoRD->startDefinition();
4040 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4041 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4042 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4043 KmpDependInfoRD->completeDefinition();
4044 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4045 }
4046}
4047
4048std::pair<llvm::Value *, LValue>
4052 QualType FlagsTy;
4053 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4054 RecordDecl *KmpDependInfoRD =
4055 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4056 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4058 DepobjLVal.getAddress().withElementType(
4059 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4060 KmpDependInfoPtrTy->castAs<PointerType>());
4061 Address DepObjAddr = CGF.Builder.CreateGEP(
4062 CGF, Base.getAddress(),
4063 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4064 LValue NumDepsBase = CGF.MakeAddrLValue(
4065 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4066 // NumDeps = deps[i].base_addr;
4067 LValue BaseAddrLVal = CGF.EmitLValueForField(
4068 NumDepsBase,
4069 *std::next(KmpDependInfoRD->field_begin(),
4070 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4071 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4072 return std::make_pair(NumDeps, Base);
4073}
4074
4075static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4076 llvm::PointerUnion<unsigned *, LValue *> Pos,
4078 Address DependenciesArray) {
4079 CodeGenModule &CGM = CGF.CGM;
4080 ASTContext &C = CGM.getContext();
4081 QualType FlagsTy;
4082 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4083 RecordDecl *KmpDependInfoRD =
4084 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4085 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4086
4087 OMPIteratorGeneratorScope IteratorScope(
4088 CGF, cast_or_null<OMPIteratorExpr>(
4089 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4090 : nullptr));
4091 for (const Expr *E : Data.DepExprs) {
4092 llvm::Value *Addr;
4093 llvm::Value *Size;
4094
4095 // The expression will be a nullptr in the 'omp_all_memory' case.
4096 if (E) {
4097 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4098 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4099 } else {
4100 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4101 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4102 }
4103 LValue Base;
4104 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4105 Base = CGF.MakeAddrLValue(
4106 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4107 } else {
4108 assert(E && "Expected a non-null expression");
4109 LValue &PosLVal = *Pos.get<LValue *>();
4110 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4111 Base = CGF.MakeAddrLValue(
4112 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4113 }
4114 // deps[i].base_addr = &<Dependencies[i].second>;
4115 LValue BaseAddrLVal = CGF.EmitLValueForField(
4116 Base,
4117 *std::next(KmpDependInfoRD->field_begin(),
4118 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4119 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4120 // deps[i].len = sizeof(<Dependencies[i].second>);
4121 LValue LenLVal = CGF.EmitLValueForField(
4122 Base, *std::next(KmpDependInfoRD->field_begin(),
4123 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4124 CGF.EmitStoreOfScalar(Size, LenLVal);
4125 // deps[i].flags = <Dependencies[i].first>;
4126 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4127 LValue FlagsLVal = CGF.EmitLValueForField(
4128 Base,
4129 *std::next(KmpDependInfoRD->field_begin(),
4130 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4132 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4133 FlagsLVal);
4134 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4135 ++(*P);
4136 } else {
4137 LValue &PosLVal = *Pos.get<LValue *>();
4138 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4139 Idx = CGF.Builder.CreateNUWAdd(Idx,
4140 llvm::ConstantInt::get(Idx->getType(), 1));
4141 CGF.EmitStoreOfScalar(Idx, PosLVal);
4142 }
4143 }
4144}
4145
4147 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4149 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4150 "Expected depobj dependency kind.");
4152 SmallVector<LValue, 4> SizeLVals;
4153 ASTContext &C = CGF.getContext();
4154 {
4155 OMPIteratorGeneratorScope IteratorScope(
4156 CGF, cast_or_null<OMPIteratorExpr>(
4157 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4158 : nullptr));
4159 for (const Expr *E : Data.DepExprs) {
4160 llvm::Value *NumDeps;
4161 LValue Base;
4162 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4163 std::tie(NumDeps, Base) =
4164 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4165 LValue NumLVal = CGF.MakeAddrLValue(
4166 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4167 C.getUIntPtrType());
4168 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4169 NumLVal.getAddress());
4170 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4171 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4172 CGF.EmitStoreOfScalar(Add, NumLVal);
4173 SizeLVals.push_back(NumLVal);
4174 }
4175 }
4176 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4177 llvm::Value *Size =
4178 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4179 Sizes.push_back(Size);
4180 }
4181 return Sizes;
4182}
4183
4185 QualType &KmpDependInfoTy,
4186 LValue PosLVal,
4188 Address DependenciesArray) {
4189 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4190 "Expected depobj dependency kind.");
4191 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4192 {
4193 OMPIteratorGeneratorScope IteratorScope(
4194 CGF, cast_or_null<OMPIteratorExpr>(
4195 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4196 : nullptr));
4197 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4198 const Expr *E = Data.DepExprs[I];
4199 llvm::Value *NumDeps;
4200 LValue Base;
4201 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4202 std::tie(NumDeps, Base) =
4203 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4204
4205 // memcopy dependency data.
4206 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4207 ElSize,
4208 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4209 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4210 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4211 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4212
4213 // Increase pos.
4214 // pos += size;
4215 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4216 CGF.EmitStoreOfScalar(Add, PosLVal);
4217 }
4218 }
4219}
4220
4221std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4224 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4225 return D.DepExprs.empty();
4226 }))
4227 return std::make_pair(nullptr, Address::invalid());
4228 // Process list of dependencies.
4230 Address DependenciesArray = Address::invalid();
4231 llvm::Value *NumOfElements = nullptr;
4232 unsigned NumDependencies = std::accumulate(
4233 Dependencies.begin(), Dependencies.end(), 0,
4234 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4235 return D.DepKind == OMPC_DEPEND_depobj
4236 ? V
4237 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4238 });
4239 QualType FlagsTy;
4240 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4241 bool HasDepobjDeps = false;
4242 bool HasRegularWithIterators = false;
4243 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4244 llvm::Value *NumOfRegularWithIterators =
4245 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4246 // Calculate number of depobj dependencies and regular deps with the
4247 // iterators.
4248 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4249 if (D.DepKind == OMPC_DEPEND_depobj) {
4252 for (llvm::Value *Size : Sizes) {
4253 NumOfDepobjElements =
4254 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4255 }
4256 HasDepobjDeps = true;
4257 continue;
4258 }
4259 // Include number of iterations, if any.
4260
4261 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4262 llvm::Value *ClauseIteratorSpace =
4263 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4264 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4265 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4266 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4267 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4268 }
4269 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4270 ClauseIteratorSpace,
4271 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4272 NumOfRegularWithIterators =
4273 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4274 HasRegularWithIterators = true;
4275 continue;
4276 }
4277 }
4278
4279 QualType KmpDependInfoArrayTy;
4280 if (HasDepobjDeps || HasRegularWithIterators) {
4281 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4282 /*isSigned=*/false);
4283 if (HasDepobjDeps) {
4284 NumOfElements =
4285 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4286 }
4287 if (HasRegularWithIterators) {
4288 NumOfElements =
4289 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4290 }
4291 auto *OVE = new (C) OpaqueValueExpr(
4292 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4293 VK_PRValue);
4294 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4295 RValue::get(NumOfElements));
4296 KmpDependInfoArrayTy =
4297 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4298 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4299 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4300 // Properly emit variable-sized array.
4301 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4303 CGF.EmitVarDecl(*PD);
4304 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4305 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4306 /*isSigned=*/false);
4307 } else {
4308 KmpDependInfoArrayTy = C.getConstantArrayType(
4309 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4310 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4311 DependenciesArray =
4312 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4313 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4314 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4315 /*isSigned=*/false);
4316 }
4317 unsigned Pos = 0;
4318 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4319 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4320 Dependencies[I].IteratorExpr)
4321 continue;
4322 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4323 DependenciesArray);
4324 }
4325 // Copy regular dependencies with iterators.
4326 LValue PosLVal = CGF.MakeAddrLValue(
4327 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4328 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4329 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4330 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4331 !Dependencies[I].IteratorExpr)
4332 continue;
4333 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4334 DependenciesArray);
4335 }
4336 // Copy final depobj arrays without iterators.
4337 if (HasDepobjDeps) {
4338 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4339 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4340 continue;
4341 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4342 DependenciesArray);
4343 }
4344 }
4345 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4346 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4347 return std::make_pair(NumOfElements, DependenciesArray);
4348}
4349
4351 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4353 if (Dependencies.DepExprs.empty())
4354 return Address::invalid();
4355 // Process list of dependencies.
4357 Address DependenciesArray = Address::invalid();
4358 unsigned NumDependencies = Dependencies.DepExprs.size();
4359 QualType FlagsTy;
4360 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4361 RecordDecl *KmpDependInfoRD =
4362 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4363
4364 llvm::Value *Size;
4365 // Define type kmp_depend_info[<Dependencies.size()>];
4366 // For depobj reserve one extra element to store the number of elements.
4367 // It is required to handle depobj(x) update(in) construct.
4368 // kmp_depend_info[<Dependencies.size()>] deps;
4369 llvm::Value *NumDepsVal;
4370 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4371 if (const auto *IE =
4372 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4373 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4374 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4375 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4376 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4377 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4378 }
4379 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4380 NumDepsVal);
4381 CharUnits SizeInBytes =
4382 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4383 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4384 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4385 NumDepsVal =
4386 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4387 } else {
4388 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4389 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4390 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4391 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4392 Size = CGM.getSize(Sz.alignTo(Align));
4393 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4394 }
4395 // Need to allocate on the dynamic memory.
4396 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4397 // Use default allocator.
4398 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4399 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4400
4401 llvm::Value *Addr =
4402 CGF.EmitRuntimeCall(