clang 22.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <cstdint>
43#include <numeric>
44#include <optional>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479};
480
481/// Describes ident structure that describes a source location.
482/// All descriptions are taken from
483/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484/// Original structure:
485/// typedef struct ident {
486/// kmp_int32 reserved_1; /**< might be used in Fortran;
487/// see above */
488/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489/// KMP_IDENT_KMPC identifies this union
490/// member */
491/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492/// see above */
493///#if USE_ITT_BUILD
494/// /* but currently used for storing
495/// region-specific ITT */
496/// /* contextual information. */
497///#endif /* USE_ITT_BUILD */
498/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499/// C++ */
500/// char const *psource; /**< String describing the source location.
501/// The string is composed of semi-colon separated
502// fields which describe the source file,
503/// the function and a pair of line numbers that
504/// delimit the construct.
505/// */
506/// } ident_t;
507enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520};
521
522/// Schedule types for 'omp for' loops (these enumerators are taken from
523/// the enum sched_type in kmp.h).
524enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552};
553
554/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555/// region.
556class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559public:
560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566};
567
568} // anonymous namespace
569
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579}
580
581/// Check if the combiner is a call to UDR combiner and if it is so return the
582/// UDR decl used for reduction.
583static const OMPDeclareReductionDecl *
584getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
590 return DRD;
591 return nullptr;
592}
593
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
602 const auto *CE = cast<CallExpr>(InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
612 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
613 (void)PrivateScope.Privatize();
616 CGF.EmitIgnoredExpr(InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646}
647
648/// Emit initialization of arrays of complex types.
649/// \param DestAddr Address of the array.
650/// \param Type Type of array.
651/// \param Init Initial expression of array.
652/// \param SrcAddr Address of the original array.
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
691 "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(DestBegin, EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
703
704 // Emit copy.
705 {
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
709 SrcElementCurrent, ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
719 "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
726 "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
731 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
735}
736
737LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739}
740
741LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E))
744 return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false);
745 return LValue();
746}
747
748void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SharedAddr);
763}
764
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(Shareds.size());
770 SharedAddresses.reserve(Shareds.size());
771 Sizes.reserve(Shareds.size());
772 BaseDecls.reserve(Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
778 std::advance(IOrig, 1);
779 std::advance(IPriv, 1);
780 std::advance(IRed, 1);
781 }
782}
783
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
789 SharedAddresses.emplace_back(First, Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(First, Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
795 OrigAddresses.emplace_back(First, Second);
796 }
797}
798
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
805 nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemType,
814 OrigAddresses[N].second.getPointer(CGF),
815 OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
823 }
824 Sizes.emplace_back(SizeInChars, Size);
826 CGF,
828 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
829 RValue::get(Size));
830 CGF.EmitVariablyModifiedType(PrivateType);
831}
832
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
843 CGF,
845 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
846 RValue::get(Size));
847 CGF.EmitVariablyModifiedType(PrivateType);
848}
849
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
866 PrivateAddr, SharedAddr, SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
870 PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873}
874
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879}
880
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType));
888 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
889 }
890}
891
892static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)),
907 BaseLV.getType(), BaseLV.getBaseInfo(),
908 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
909}
910
912 Address OriginalBaseAddress, llvm::Value *Addr) {
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
919 Tmp = CGF.CreateMemTemp(BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
930 Addr, Tmp.getElementType());
931 CGF.Builder.CreateStore(Addr, Tmp);
932 return MostTopTmp;
933 }
934
936 Addr, OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
938}
939
940static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
949 OrigVD = cast<VarDecl>(DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
955 OrigVD = cast<VarDecl>(DE->getDecl());
956 }
957 return OrigVD;
958}
959
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968 OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
972 SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
975 PrivateAddr.emitRawPointer(CGF), SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 SharedAddr.getElementType(), PrivatePointer, Adjustment);
978 return castToBase(CGF, OrigVD->getType(),
979 SharedAddresses[N].first.getType(),
980 OriginalBaseLValue.getAddress(), Ptr);
981 }
982 BaseDecls.emplace_back(
983 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985}
986
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991}
992
993LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
996 getThreadIDVariable()->getType()->castAs<PointerType>());
997}
998
999void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012}
1013
1014LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1017 getThreadIDVariable()->getType(),
1019}
1020
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1025 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(Field);
1029 return Field;
1030}
1031
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 Config.setDefaultTargetAS(
1041 CGM.getContext().getTargetInfo().getTargetAddressSpace(LangAS::Default));
1042
1043 OMPBuilder.setConfig(Config);
1044 OMPBuilder.initialize();
1045 OMPBuilder.loadOffloadInfoMetadata(*CGM.getFileSystem(),
1046 CGM.getLangOpts().OpenMPIsTargetDevice
1047 ? CGM.getLangOpts().OMPHostIRFile
1048 : StringRef{});
1049
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM.getLangOpts().OpenMPForceUSM) {
1054 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1055 }
1056}
1057
1059 InternalVars.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data : EmittedNonTargetVariables) {
1062 if (!Data.getValue().pointsToAliveValue())
1063 continue;
1064 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1065 if (!GV)
1066 continue;
1067 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1068 continue;
1069 GV->eraseFromParent();
1070 }
1071}
1072
1074 return OMPBuilder.createPlatformSpecificName(Parts);
1075}
1076
1077static llvm::Function *
1079 const Expr *CombinerInitializer, const VarDecl *In,
1080 const VarDecl *Out, bool IsCombiner) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext &C = CGM.getContext();
1083 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1084 FunctionArgList Args;
1085 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1088 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1089 Args.push_back(&OmpOutParm);
1090 Args.push_back(&OmpInParm);
1091 const CGFunctionInfo &FnInfo =
1092 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1093 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1094 std::string Name = CGM.getOpenMPRuntime().getName(
1095 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1097 Name, &CGM.getModule());
1098 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1099 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
1100 Fn->removeFnAttr(llvm::Attribute::NoInline);
1101 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1102 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1103 }
1104 CodeGenFunction CGF(CGM);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1108 Out->getLocation());
1110 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1111 Scope.addPrivate(
1112 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1115 Scope.addPrivate(
1116 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1117 .getAddress());
1118 (void)Scope.Privatize();
1119 if (!IsCombiner && Out->hasInit() &&
1120 !CGF.isTrivialInitializer(Out->getInit())) {
1121 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1122 Out->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1124 }
1125 if (CombinerInitializer)
1126 CGF.EmitIgnoredExpr(CombinerInitializer);
1127 Scope.ForceCleanup();
1128 CGF.FinishFunction();
1129 return Fn;
1130}
1131
1134 if (UDRMap.count(D) > 0)
1135 return;
1136 llvm::Function *Combiner = emitCombinerOrInitializer(
1137 CGM, D->getType(), D->getCombiner(),
1140 /*IsCombiner=*/true);
1141 llvm::Function *Initializer = nullptr;
1142 if (const Expr *Init = D->getInitializer()) {
1144 CGM, D->getType(),
1146 : nullptr,
1149 /*IsCombiner=*/false);
1150 }
1151 UDRMap.try_emplace(D, Combiner, Initializer);
1152 if (CGF)
1153 FunctionUDRMap[CGF->CurFn].push_back(D);
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1158 auto I = UDRMap.find(D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 return llvm::Error::success();
1196 };
1197
1198 // TODO: Remove this once we emit parallel regions through the
1199 // OpenMPIRBuilder as it can do this setup internally.
1200 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1201 OMPBuilder->pushFinalizationCB(std::move(FI));
1202 }
1203 ~PushAndPopStackRAII() {
1204 if (OMPBuilder)
1205 OMPBuilder->popFinalizationCB();
1206 }
1207 llvm::OpenMPIRBuilder *OMPBuilder;
1208};
1209} // namespace
1210
1212 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1213 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1214 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1215 assert(ThreadIDVar->getType()->isPointerType() &&
1216 "thread id variable must be of type kmp_int32 *");
1217 CodeGenFunction CGF(CGM, true);
1218 bool HasCancel = false;
1219 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1220 HasCancel = OPD->hasCancel();
1221 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1222 HasCancel = OPD->hasCancel();
1223 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1224 HasCancel = OPSD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1228 HasCancel = OPFD->hasCancel();
1229 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1230 HasCancel = OPFD->hasCancel();
1231 else if (const auto *OPFD =
1232 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1233 HasCancel = OPFD->hasCancel();
1234 else if (const auto *OPFD =
1235 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1236 HasCancel = OPFD->hasCancel();
1237
1238 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1239 // parallel region to make cancellation barriers work properly.
1240 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1241 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1242 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1243 HasCancel, OutlinedHelperName);
1244 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1245 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D);
1246}
1247
1248std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1249 std::string Suffix = getName({"omp_outlined"});
1250 return (Name + Suffix).str();
1251}
1252
1254 return getOutlinedHelperName(CGF.CurFn->getName());
1255}
1256
1257std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1258 std::string Suffix = getName({"omp", "reduction", "reduction_func"});
1259 return (Name + Suffix).str();
1260}
1261
1264 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1265 const RegionCodeGenTy &CodeGen) {
1266 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1268 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1269 CodeGen);
1270}
1271
1274 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1275 const RegionCodeGenTy &CodeGen) {
1276 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1278 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1279 CodeGen);
1280}
1281
1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1284 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1286 bool Tied, unsigned &NumberOfParts) {
1287 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1288 PrePostActionTy &) {
1289 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1290 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1291 llvm::Value *TaskArgs[] = {
1292 UpLoc, ThreadID,
1293 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1294 TaskTVar->getType()->castAs<PointerType>())
1295 .getPointer(CGF)};
1296 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1297 CGM.getModule(), OMPRTL___kmpc_omp_task),
1298 TaskArgs);
1299 };
1300 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1301 UntiedCodeGen);
1302 CodeGen.setAction(Action);
1303 assert(!ThreadIDVar->getType()->isPointerType() &&
1304 "thread id variable must be of type kmp_int32 for tasks");
1305 const OpenMPDirectiveKind Region =
1306 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1307 : OMPD_task;
1308 const CapturedStmt *CS = D.getCapturedStmt(Region);
1309 bool HasCancel = false;
1310 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1313 HasCancel = TD->hasCancel();
1314 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1315 HasCancel = TD->hasCancel();
1316 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1317 HasCancel = TD->hasCancel();
1318
1319 CodeGenFunction CGF(CGM, true);
1320 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1321 InnermostKind, HasCancel, Action);
1322 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1323 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1324 if (!Tied)
1325 NumberOfParts = Action.getNumberOfParts();
1326 return Res;
1327}
1328
1330 bool AtCurrentPoint) {
1331 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1332 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1333
1334 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1335 if (AtCurrentPoint) {
1336 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1337 CGF.Builder.GetInsertBlock());
1338 } else {
1339 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator());
1341 }
1342}
1343
1345 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1346 if (Elem.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1348 Elem.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1359 OS << ";";
1360 if (auto *DbgInfo = CGF.getDebugInfo())
1361 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1362 else
1363 OS << PLoc.getFilename();
1364 OS << ";";
1365 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1366 OS << FD->getQualifiedNameAsString();
1367 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1368 return OS.str();
1369}
1370
1372 SourceLocation Loc,
1373 unsigned Flags, bool EmitLoc) {
1374 uint32_t SrcLocStrSize;
1375 llvm::Constant *SrcLocStr;
1376 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1377 llvm::codegenoptions::NoDebugInfo) ||
1378 Loc.isInvalid()) {
1379 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1380 } else {
1381 std::string FunctionName;
1382 std::string FileName;
1383 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1384 FunctionName = FD->getQualifiedNameAsString();
1386 if (auto *DbgInfo = CGF.getDebugInfo())
1387 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1388 else
1389 FileName = PLoc.getFilename();
1390 unsigned Line = PLoc.getLine();
1391 unsigned Column = PLoc.getColumn();
1392 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1393 Column, SrcLocStrSize);
1394 }
1395 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1396 return OMPBuilder.getOrCreateIdent(
1397 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1398}
1399
1401 SourceLocation Loc) {
1402 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1403 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1404 // the clang invariants used below might be broken.
1405 if (CGM.getLangOpts().OpenMPIRBuilder) {
1406 SmallString<128> Buffer;
1407 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1408 uint32_t SrcLocStrSize;
1409 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1410 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1411 return OMPBuilder.getOrCreateThreadID(
1412 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1413 }
1414
1415 llvm::Value *ThreadID = nullptr;
1416 // Check whether we've already cached a load of the thread id in this
1417 // function.
1418 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1419 if (I != OpenMPLocThreadIDMap.end()) {
1420 ThreadID = I->second.ThreadID;
1421 if (ThreadID != nullptr)
1422 return ThreadID;
1423 }
1424 // If exceptions are enabled, do not use parameter to avoid possible crash.
1425 if (auto *OMPRegionInfo =
1426 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1427 if (OMPRegionInfo->getThreadIDVariable()) {
1428 // Check if this an outlined function with thread id passed as argument.
1429 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1430 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1431 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1432 !CGF.getLangOpts().CXXExceptions ||
1433 CGF.Builder.GetInsertBlock() == TopBlock ||
1434 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1435 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1436 TopBlock ||
1437 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1438 CGF.Builder.GetInsertBlock()) {
1439 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1440 // If value loaded in entry block, cache it and use it everywhere in
1441 // function.
1442 if (CGF.Builder.GetInsertBlock() == TopBlock)
1443 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1444 return ThreadID;
1445 }
1446 }
1447 }
1448
1449 // This is not an outlined function region - need to call __kmpc_int32
1450 // kmpc_global_thread_num(ident_t *loc).
1451 // Generate thread id value and cache this value for use across the
1452 // function.
1453 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1454 if (!Elem.ServiceInsertPt)
1456 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1457 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1459 llvm::CallInst *Call = CGF.Builder.CreateCall(
1460 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1461 OMPRTL___kmpc_global_thread_num),
1462 emitUpdateLocation(CGF, Loc));
1463 Call->setCallingConv(CGF.getRuntimeCC());
1464 Elem.ThreadID = Call;
1465 return Call;
1466}
1467
1469 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1470 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1472 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1473 }
1474 if (auto I = FunctionUDRMap.find(CGF.CurFn); I != FunctionUDRMap.end()) {
1475 for (const auto *D : I->second)
1476 UDRMap.erase(D);
1477 FunctionUDRMap.erase(I);
1478 }
1479 if (auto I = FunctionUDMMap.find(CGF.CurFn); I != FunctionUDMMap.end()) {
1480 for (const auto *D : I->second)
1481 UDMMap.erase(D);
1482 FunctionUDMMap.erase(I);
1483 }
1486}
1487
1489 return OMPBuilder.IdentPtr;
1490}
1491
1492static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1494 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1495 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1496 if (!DevTy)
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1498
1499 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1500 case OMPDeclareTargetDeclAttr::DT_Host:
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1502 break;
1503 case OMPDeclareTargetDeclAttr::DT_NoHost:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1505 break;
1506 case OMPDeclareTargetDeclAttr::DT_Any:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1508 break;
1509 default:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1511 break;
1512 }
1513}
1514
1515static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1517 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1519 if (!MapType)
1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1521 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1524 break;
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1527 break;
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1530 break;
1531 default:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1533 break;
1534 }
1535}
1536
1537static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1538 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1539 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1540
1541 auto FileInfoCallBack = [&]() {
1543 PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc);
1544
1545 llvm::sys::fs::UniqueID ID;
1546 if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1547 PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false);
1548 }
1549
1550 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1551 };
1552
1553 return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName);
1554}
1555
1557 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
1558
1559 auto LinkageForVariable = [&VD, this]() {
1560 return CGM.getLLVMLinkageVarDefinition(VD);
1561 };
1562
1563 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1564
1565 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1566 CGM.getContext().getPointerType(VD->getType()));
1567 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1569 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1570 VD->isExternallyVisible(),
1572 VD->getCanonicalDecl()->getBeginLoc()),
1573 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
1574 CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal,
1575 LinkageForVariable);
1576
1577 if (!addr)
1578 return ConstantAddress::invalid();
1579 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1580}
1581
1582llvm::Constant *
1584 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1585 !CGM.getContext().getTargetInfo().isTLSSupported());
1586 // Lookup the entry, lazily creating it if necessary.
1587 std::string Suffix = getName({"cache", ""});
1588 return OMPBuilder.getOrCreateInternalVariable(
1589 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1590}
1591
1593 const VarDecl *VD,
1594 Address VDAddr,
1595 SourceLocation Loc) {
1596 if (CGM.getLangOpts().OpenMPUseTLS &&
1597 CGM.getContext().getTargetInfo().isTLSSupported())
1598 return VDAddr;
1599
1600 llvm::Type *VarTy = VDAddr.getElementType();
1601 llvm::Value *Args[] = {
1602 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1603 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy),
1604 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1606 return Address(
1607 CGF.EmitRuntimeCall(
1608 OMPBuilder.getOrCreateRuntimeFunction(
1609 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1610 Args),
1611 CGF.Int8Ty, VDAddr.getAlignment());
1612}
1613
1615 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1616 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1617 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1618 // library.
1619 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1620 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1621 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1622 OMPLoc);
1623 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1624 // to register constructor/destructor for variable.
1625 llvm::Value *Args[] = {
1626 OMPLoc,
1627 CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy),
1628 Ctor, CopyCtor, Dtor};
1629 CGF.EmitRuntimeCall(
1630 OMPBuilder.getOrCreateRuntimeFunction(
1631 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1632 Args);
1633}
1634
1636 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1637 bool PerformInit, CodeGenFunction *CGF) {
1638 if (CGM.getLangOpts().OpenMPUseTLS &&
1639 CGM.getContext().getTargetInfo().isTLSSupported())
1640 return nullptr;
1641
1642 VD = VD->getDefinition(CGM.getContext());
1643 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1644 QualType ASTTy = VD->getType();
1645
1646 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1647 const Expr *Init = VD->getAnyInitializer();
1648 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1649 // Generate function that re-emits the declaration's initializer into the
1650 // threadprivate copy of the variable VD
1651 CodeGenFunction CtorCGF(CGM);
1652 FunctionArgList Args;
1653 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1654 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1656 Args.push_back(&Dst);
1657
1658 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1659 CGM.getContext().VoidPtrTy, Args);
1660 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1661 std::string Name = getName({"__kmpc_global_ctor_", ""});
1662 llvm::Function *Fn =
1663 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1664 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1665 Args, Loc, Loc);
1666 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1667 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1668 CGM.getContext().VoidPtrTy, Dst.getLocation());
1669 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy),
1670 VDAddr.getAlignment());
1671 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1672 /*IsInitializer=*/true);
1673 ArgVal = CtorCGF.EmitLoadOfScalar(
1674 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1675 CGM.getContext().VoidPtrTy, Dst.getLocation());
1676 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1677 CtorCGF.FinishFunction();
1678 Ctor = Fn;
1679 }
1681 // Generate function that emits destructor call for the threadprivate copy
1682 // of the variable VD
1683 CodeGenFunction DtorCGF(CGM);
1684 FunctionArgList Args;
1685 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1686 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1688 Args.push_back(&Dst);
1689
1690 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1691 CGM.getContext().VoidTy, Args);
1692 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1693 std::string Name = getName({"__kmpc_global_dtor_", ""});
1694 llvm::Function *Fn =
1695 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1696 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1697 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1698 Loc, Loc);
1699 // Create a scope with an artificial location for the body of this function.
1700 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1701 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1702 DtorCGF.GetAddrOfLocalVar(&Dst),
1703 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1704 DtorCGF.emitDestroy(
1705 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1706 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1707 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1708 DtorCGF.FinishFunction();
1709 Dtor = Fn;
1710 }
1711 // Do not emit init function if it is not required.
1712 if (!Ctor && !Dtor)
1713 return nullptr;
1714
1715 // Copying constructor for the threadprivate variable.
1716 // Must be NULL - reserved by runtime, but currently it requires that this
1717 // parameter is always NULL. Otherwise it fires assertion.
1718 CopyCtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1719 if (Ctor == nullptr) {
1720 Ctor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1721 }
1722 if (Dtor == nullptr) {
1723 Dtor = llvm::Constant::getNullValue(CGM.UnqualPtrTy);
1724 }
1725 if (!CGF) {
1726 auto *InitFunctionTy =
1727 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1728 std::string Name = getName({"__omp_threadprivate_init_", ""});
1729 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1730 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1731 CodeGenFunction InitCGF(CGM);
1732 FunctionArgList ArgList;
1733 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1734 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1735 Loc, Loc);
1736 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1737 InitCGF.FinishFunction();
1738 return InitFunction;
1739 }
1740 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1741 }
1742 return nullptr;
1743}
1744
1746 llvm::GlobalValue *GV) {
1747 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1748 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1749
1750 // We only need to handle active 'indirect' declare target functions.
1751 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1752 return;
1753
1754 // Get a mangled name to store the new device global in.
1755 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1757 SmallString<128> Name;
1758 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1759
1760 // We need to generate a new global to hold the address of the indirectly
1761 // called device function. Doing this allows us to keep the visibility and
1762 // linkage of the associated function unchanged while allowing the runtime to
1763 // access its value.
1764 llvm::GlobalValue *Addr = GV;
1765 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1766 Addr = new llvm::GlobalVariable(
1767 CGM.getModule(), CGM.VoidPtrTy,
1768 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1769 nullptr, llvm::GlobalValue::NotThreadLocal,
1770 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1771 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1772 }
1773
1774 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1775 Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(),
1776 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1777 llvm::GlobalValue::WeakODRLinkage);
1778}
1779
1781 QualType VarType,
1782 StringRef Name) {
1783 std::string Suffix = getName({"artificial", ""});
1784 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1785 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1786 VarLVType, Twine(Name).concat(Suffix).str());
1787 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1788 CGM.getTarget().isTLSSupported()) {
1789 GAddr->setThreadLocal(/*Val=*/true);
1790 return Address(GAddr, GAddr->getValueType(),
1791 CGM.getContext().getTypeAlignInChars(VarType));
1792 }
1793 std::string CacheSuffix = getName({"cache", ""});
1794 llvm::Value *Args[] = {
1797 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1798 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1799 /*isSigned=*/false),
1800 OMPBuilder.getOrCreateInternalVariable(
1801 CGM.VoidPtrPtrTy,
1802 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1803 return Address(
1805 CGF.EmitRuntimeCall(
1806 OMPBuilder.getOrCreateRuntimeFunction(
1807 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1808 Args),
1809 CGF.Builder.getPtrTy(0)),
1810 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1811}
1812
1814 const RegionCodeGenTy &ThenGen,
1815 const RegionCodeGenTy &ElseGen) {
1816 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1817
1818 // If the condition constant folds and can be elided, try to avoid emitting
1819 // the condition and the dead arm of the if/else.
1820 bool CondConstant;
1821 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
1822 if (CondConstant)
1823 ThenGen(CGF);
1824 else
1825 ElseGen(CGF);
1826 return;
1827 }
1828
1829 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1830 // emit the conditional branch.
1831 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
1832 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
1833 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
1834 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
1835
1836 // Emit the 'then' code.
1837 CGF.EmitBlock(ThenBlock);
1838 ThenGen(CGF);
1839 CGF.EmitBranch(ContBlock);
1840 // Emit the 'else' code if present.
1841 // There is no need to emit line number for unconditional branch.
1843 CGF.EmitBlock(ElseBlock);
1844 ElseGen(CGF);
1845 // There is no need to emit line number for unconditional branch.
1847 CGF.EmitBranch(ContBlock);
1848 // Emit the continuation block for code after the if.
1849 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
1850}
1851
1853 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
1854 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
1855 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
1856 OpenMPSeverityClauseKind Severity, const Expr *Message) {
1857 if (!CGF.HaveInsertPoint())
1858 return;
1859 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1860 auto &M = CGM.getModule();
1861 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1862 this](CodeGenFunction &CGF, PrePostActionTy &) {
1863 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1864 llvm::Value *Args[] = {
1865 RTLoc,
1866 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
1867 OutlinedFn};
1869 RealArgs.append(std::begin(Args), std::end(Args));
1870 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
1871
1872 llvm::FunctionCallee RTLFn =
1873 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
1874 CGF.EmitRuntimeCall(RTLFn, RealArgs);
1875 };
1876 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1877 this](CodeGenFunction &CGF, PrePostActionTy &) {
1879 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1880 // Build calls:
1881 // __kmpc_serialized_parallel(&Loc, GTid);
1882 llvm::Value *Args[] = {RTLoc, ThreadID};
1883 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1884 M, OMPRTL___kmpc_serialized_parallel),
1885 Args);
1886
1887 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1888 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1889 RawAddress ZeroAddrBound =
1891 /*Name=*/".bound.zero.addr");
1892 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
1894 // ThreadId for serialized parallels is 0.
1895 OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF));
1896 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
1897 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
1898
1899 // Ensure we do not inline the function. This is trivially true for the ones
1900 // passed to __kmpc_fork_call but the ones called in serialized regions
1901 // could be inlined. This is not a perfect but it is closer to the invariant
1902 // we want, namely, every data environment starts with a new function.
1903 // TODO: We should pass the if condition to the runtime function and do the
1904 // handling there. Much cleaner code.
1905 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1906 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1907 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
1908
1909 // __kmpc_end_serialized_parallel(&Loc, GTid);
1910 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1911 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1912 M, OMPRTL___kmpc_end_serialized_parallel),
1913 EndArgs);
1914 };
1915 if (IfCond) {
1916 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
1917 } else {
1918 RegionCodeGenTy ThenRCG(ThenGen);
1919 ThenRCG(CGF);
1920 }
1921}
1922
1923// If we're inside an (outlined) parallel region, use the region info's
1924// thread-ID variable (it is passed in a first argument of the outlined function
1925// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1926// regular serial code region, get thread ID by calling kmp_int32
1927// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1928// return the address of that temp.
1930 SourceLocation Loc) {
1931 if (auto *OMPRegionInfo =
1932 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
1933 if (OMPRegionInfo->getThreadIDVariable())
1934 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1935
1936 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1937 QualType Int32Ty =
1938 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1939 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
1940 CGF.EmitStoreOfScalar(ThreadID,
1941 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
1942
1943 return ThreadIDTemp;
1944}
1945
1946llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1947 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1948 std::string Name = getName({Prefix, "var"});
1949 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
1950}
1951
1952namespace {
1953/// Common pre(post)-action for different OpenMP constructs.
1954class CommonActionTy final : public PrePostActionTy {
1955 llvm::FunctionCallee EnterCallee;
1956 ArrayRef<llvm::Value *> EnterArgs;
1957 llvm::FunctionCallee ExitCallee;
1958 ArrayRef<llvm::Value *> ExitArgs;
1959 bool Conditional;
1960 llvm::BasicBlock *ContBlock = nullptr;
1961
1962public:
1963 CommonActionTy(llvm::FunctionCallee EnterCallee,
1964 ArrayRef<llvm::Value *> EnterArgs,
1965 llvm::FunctionCallee ExitCallee,
1966 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1967 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1968 ExitArgs(ExitArgs), Conditional(Conditional) {}
1969 void Enter(CodeGenFunction &CGF) override {
1970 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
1971 if (Conditional) {
1972 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
1973 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
1974 ContBlock = CGF.createBasicBlock("omp_if.end");
1975 // Generate the branch (If-stmt)
1976 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
1977 CGF.EmitBlock(ThenBlock);
1978 }
1979 }
1980 void Done(CodeGenFunction &CGF) {
1981 // Emit the rest of blocks/branches
1982 CGF.EmitBranch(ContBlock);
1983 CGF.EmitBlock(ContBlock, true);
1984 }
1985 void Exit(CodeGenFunction &CGF) override {
1986 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
1987 }
1988};
1989} // anonymous namespace
1990
1992 StringRef CriticalName,
1993 const RegionCodeGenTy &CriticalOpGen,
1994 SourceLocation Loc, const Expr *Hint) {
1995 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1996 // CriticalOpGen();
1997 // __kmpc_end_critical(ident_t *, gtid, Lock);
1998 // Prepare arguments and build a call to __kmpc_critical
1999 if (!CGF.HaveInsertPoint())
2000 return;
2001 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2002 getCriticalRegionLock(CriticalName)};
2003 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2004 std::end(Args));
2005 if (Hint) {
2006 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2007 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2008 }
2009 CommonActionTy Action(
2010 OMPBuilder.getOrCreateRuntimeFunction(
2011 CGM.getModule(),
2012 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2013 EnterArgs,
2014 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2015 OMPRTL___kmpc_end_critical),
2016 Args);
2017 CriticalOpGen.setAction(Action);
2018 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2019}
2020
2022 const RegionCodeGenTy &MasterOpGen,
2023 SourceLocation Loc) {
2024 if (!CGF.HaveInsertPoint())
2025 return;
2026 // if(__kmpc_master(ident_t *, gtid)) {
2027 // MasterOpGen();
2028 // __kmpc_end_master(ident_t *, gtid);
2029 // }
2030 // Prepare arguments and build a call to __kmpc_master
2031 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2032 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2033 CGM.getModule(), OMPRTL___kmpc_master),
2034 Args,
2035 OMPBuilder.getOrCreateRuntimeFunction(
2036 CGM.getModule(), OMPRTL___kmpc_end_master),
2037 Args,
2038 /*Conditional=*/true);
2039 MasterOpGen.setAction(Action);
2040 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2041 Action.Done(CGF);
2042}
2043
2045 const RegionCodeGenTy &MaskedOpGen,
2046 SourceLocation Loc, const Expr *Filter) {
2047 if (!CGF.HaveInsertPoint())
2048 return;
2049 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2050 // MaskedOpGen();
2051 // __kmpc_end_masked(iden_t *, gtid);
2052 // }
2053 // Prepare arguments and build a call to __kmpc_masked
2054 llvm::Value *FilterVal = Filter
2055 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2056 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2057 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2058 FilterVal};
2059 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2060 getThreadID(CGF, Loc)};
2061 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2062 CGM.getModule(), OMPRTL___kmpc_masked),
2063 Args,
2064 OMPBuilder.getOrCreateRuntimeFunction(
2065 CGM.getModule(), OMPRTL___kmpc_end_masked),
2066 ArgsEnd,
2067 /*Conditional=*/true);
2068 MaskedOpGen.setAction(Action);
2069 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2070 Action.Done(CGF);
2071}
2072
2074 SourceLocation Loc) {
2075 if (!CGF.HaveInsertPoint())
2076 return;
2077 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2078 OMPBuilder.createTaskyield(CGF.Builder);
2079 } else {
2080 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2081 llvm::Value *Args[] = {
2082 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2083 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2084 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2085 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2086 Args);
2087 }
2088
2089 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2090 Region->emitUntiedSwitch(CGF);
2091}
2092
2094 const RegionCodeGenTy &TaskgroupOpGen,
2095 SourceLocation Loc) {
2096 if (!CGF.HaveInsertPoint())
2097 return;
2098 // __kmpc_taskgroup(ident_t *, gtid);
2099 // TaskgroupOpGen();
2100 // __kmpc_end_taskgroup(ident_t *, gtid);
2101 // Prepare arguments and build a call to __kmpc_taskgroup
2102 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2103 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2104 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2105 Args,
2106 OMPBuilder.getOrCreateRuntimeFunction(
2107 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2108 Args);
2109 TaskgroupOpGen.setAction(Action);
2110 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2111}
2112
2113/// Given an array of pointers to variables, project the address of a
2114/// given variable.
2116 unsigned Index, const VarDecl *Var) {
2117 // Pull out the pointer to the variable.
2118 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2119 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2120
2121 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2122 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2123}
2124
2126 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2127 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2128 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2129 SourceLocation Loc) {
2130 ASTContext &C = CGM.getContext();
2131 // void copy_func(void *LHSArg, void *RHSArg);
2132 FunctionArgList Args;
2133 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2135 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2137 Args.push_back(&LHSArg);
2138 Args.push_back(&RHSArg);
2139 const auto &CGFI =
2140 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2141 std::string Name =
2142 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2143 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2144 llvm::GlobalValue::InternalLinkage, Name,
2145 &CGM.getModule());
2147 Fn->setDoesNotRecurse();
2148 CodeGenFunction CGF(CGM);
2149 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2150 // Dest = (void*[n])(LHSArg);
2151 // Src = (void*[n])(RHSArg);
2153 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2154 CGF.Builder.getPtrTy(0)),
2155 ArgsElemType, CGF.getPointerAlign());
2157 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2158 CGF.Builder.getPtrTy(0)),
2159 ArgsElemType, CGF.getPointerAlign());
2160 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2161 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2162 // ...
2163 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2164 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2165 const auto *DestVar =
2166 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2167 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2168
2169 const auto *SrcVar =
2170 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2171 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2172
2173 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2174 QualType Type = VD->getType();
2175 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2176 }
2177 CGF.FinishFunction();
2178 return Fn;
2179}
2180
2182 const RegionCodeGenTy &SingleOpGen,
2183 SourceLocation Loc,
2184 ArrayRef<const Expr *> CopyprivateVars,
2185 ArrayRef<const Expr *> SrcExprs,
2186 ArrayRef<const Expr *> DstExprs,
2187 ArrayRef<const Expr *> AssignmentOps) {
2188 if (!CGF.HaveInsertPoint())
2189 return;
2190 assert(CopyprivateVars.size() == SrcExprs.size() &&
2191 CopyprivateVars.size() == DstExprs.size() &&
2192 CopyprivateVars.size() == AssignmentOps.size());
2193 ASTContext &C = CGM.getContext();
2194 // int32 did_it = 0;
2195 // if(__kmpc_single(ident_t *, gtid)) {
2196 // SingleOpGen();
2197 // __kmpc_end_single(ident_t *, gtid);
2198 // did_it = 1;
2199 // }
2200 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2201 // <copy_func>, did_it);
2202
2203 Address DidIt = Address::invalid();
2204 if (!CopyprivateVars.empty()) {
2205 // int32 did_it = 0;
2206 QualType KmpInt32Ty =
2207 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2208 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2209 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2210 }
2211 // Prepare arguments and build a call to __kmpc_single
2212 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2213 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2214 CGM.getModule(), OMPRTL___kmpc_single),
2215 Args,
2216 OMPBuilder.getOrCreateRuntimeFunction(
2217 CGM.getModule(), OMPRTL___kmpc_end_single),
2218 Args,
2219 /*Conditional=*/true);
2220 SingleOpGen.setAction(Action);
2221 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2222 if (DidIt.isValid()) {
2223 // did_it = 1;
2224 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2225 }
2226 Action.Done(CGF);
2227 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2228 // <copy_func>, did_it);
2229 if (DidIt.isValid()) {
2230 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2231 QualType CopyprivateArrayTy = C.getConstantArrayType(
2232 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
2233 /*IndexTypeQuals=*/0);
2234 // Create a list of all private variables for copyprivate.
2235 Address CopyprivateList =
2236 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2237 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2238 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2239 CGF.Builder.CreateStore(
2241 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2242 CGF.VoidPtrTy),
2243 Elem);
2244 }
2245 // Build function that copies private values from single region to all other
2246 // threads in the corresponding parallel region.
2247 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2248 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2249 SrcExprs, DstExprs, AssignmentOps, Loc);
2250 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2252 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2253 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2254 llvm::Value *Args[] = {
2255 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2256 getThreadID(CGF, Loc), // i32 <gtid>
2257 BufSize, // size_t <buf_size>
2258 CL.emitRawPointer(CGF), // void *<copyprivate list>
2259 CpyFn, // void (*) (void *, void *) <copy_func>
2260 DidItVal // i32 did_it
2261 };
2262 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2263 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2264 Args);
2265 }
2266}
2267
2269 const RegionCodeGenTy &OrderedOpGen,
2270 SourceLocation Loc, bool IsThreads) {
2271 if (!CGF.HaveInsertPoint())
2272 return;
2273 // __kmpc_ordered(ident_t *, gtid);
2274 // OrderedOpGen();
2275 // __kmpc_end_ordered(ident_t *, gtid);
2276 // Prepare arguments and build a call to __kmpc_ordered
2277 if (IsThreads) {
2278 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2279 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2280 CGM.getModule(), OMPRTL___kmpc_ordered),
2281 Args,
2282 OMPBuilder.getOrCreateRuntimeFunction(
2283 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2284 Args);
2285 OrderedOpGen.setAction(Action);
2286 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2287 return;
2288 }
2289 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2290}
2291
2293 unsigned Flags;
2294 if (Kind == OMPD_for)
2295 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2296 else if (Kind == OMPD_sections)
2297 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2298 else if (Kind == OMPD_single)
2299 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2300 else if (Kind == OMPD_barrier)
2301 Flags = OMP_IDENT_BARRIER_EXPL;
2302 else
2303 Flags = OMP_IDENT_BARRIER_IMPL;
2304 return Flags;
2305}
2306
2308 CodeGenFunction &CGF, const OMPLoopDirective &S,
2309 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2310 // Check if the loop directive is actually a doacross loop directive. In this
2311 // case choose static, 1 schedule.
2312 if (llvm::any_of(
2313 S.getClausesOfKind<OMPOrderedClause>(),
2314 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2315 ScheduleKind = OMPC_SCHEDULE_static;
2316 // Chunk size is 1 in this case.
2317 llvm::APInt ChunkSize(32, 1);
2318 ChunkExpr = IntegerLiteral::Create(
2319 CGF.getContext(), ChunkSize,
2320 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2321 SourceLocation());
2322 }
2323}
2324
2326 OpenMPDirectiveKind Kind, bool EmitChecks,
2327 bool ForceSimpleCall) {
2328 // Check if we should use the OMPBuilder
2329 auto *OMPRegionInfo =
2330 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2331 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2332 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2333 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2334 EmitChecks));
2335 CGF.Builder.restoreIP(AfterIP);
2336 return;
2337 }
2338
2339 if (!CGF.HaveInsertPoint())
2340 return;
2341 // Build call __kmpc_cancel_barrier(loc, thread_id);
2342 // Build call __kmpc_barrier(loc, thread_id);
2343 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2344 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2345 // thread_id);
2346 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2347 getThreadID(CGF, Loc)};
2348 if (OMPRegionInfo) {
2349 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2350 llvm::Value *Result = CGF.EmitRuntimeCall(
2351 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2352 OMPRTL___kmpc_cancel_barrier),
2353 Args);
2354 if (EmitChecks) {
2355 // if (__kmpc_cancel_barrier()) {
2356 // exit from construct;
2357 // }
2358 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2359 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2360 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2361 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2362 CGF.EmitBlock(ExitBB);
2363 // exit from construct;
2364 CodeGenFunction::JumpDest CancelDestination =
2365 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2366 CGF.EmitBranchThroughCleanup(CancelDestination);
2367 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2368 }
2369 return;
2370 }
2371 }
2372 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2373 CGM.getModule(), OMPRTL___kmpc_barrier),
2374 Args);
2375}
2376
2378 Expr *ME, bool IsFatal) {
2379 llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
2380 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2381 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2382 // *message)
2383 llvm::Value *Args[] = {
2384 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2385 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2386 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2387 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2388 CGM.getModule(), OMPRTL___kmpc_error),
2389 Args);
2390}
2391
2392/// Map the OpenMP loop schedule to the runtime enumeration.
2393static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2394 bool Chunked, bool Ordered) {
2395 switch (ScheduleKind) {
2396 case OMPC_SCHEDULE_static:
2397 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2398 : (Ordered ? OMP_ord_static : OMP_sch_static);
2399 case OMPC_SCHEDULE_dynamic:
2400 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2401 case OMPC_SCHEDULE_guided:
2402 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2403 case OMPC_SCHEDULE_runtime:
2404 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2405 case OMPC_SCHEDULE_auto:
2406 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2408 assert(!Chunked && "chunk was specified but schedule kind not known");
2409 return Ordered ? OMP_ord_static : OMP_sch_static;
2410 }
2411 llvm_unreachable("Unexpected runtime schedule");
2412}
2413
2414/// Map the OpenMP distribute schedule to the runtime enumeration.
2415static OpenMPSchedType
2417 // only static is allowed for dist_schedule
2418 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2419}
2420
2422 bool Chunked) const {
2423 OpenMPSchedType Schedule =
2424 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2425 return Schedule == OMP_sch_static;
2426}
2427
2429 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2430 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2431 return Schedule == OMP_dist_sch_static;
2432}
2433
2435 bool Chunked) const {
2436 OpenMPSchedType Schedule =
2437 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2438 return Schedule == OMP_sch_static_chunked;
2439}
2440
2442 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2443 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2444 return Schedule == OMP_dist_sch_static_chunked;
2445}
2446
2448 OpenMPSchedType Schedule =
2449 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2450 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2451 return Schedule != OMP_sch_static;
2452}
2453
2454static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2457 int Modifier = 0;
2458 switch (M1) {
2459 case OMPC_SCHEDULE_MODIFIER_monotonic:
2460 Modifier = OMP_sch_modifier_monotonic;
2461 break;
2462 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2463 Modifier = OMP_sch_modifier_nonmonotonic;
2464 break;
2465 case OMPC_SCHEDULE_MODIFIER_simd:
2466 if (Schedule == OMP_sch_static_chunked)
2467 Schedule = OMP_sch_static_balanced_chunked;
2468 break;
2471 break;
2472 }
2473 switch (M2) {
2474 case OMPC_SCHEDULE_MODIFIER_monotonic:
2475 Modifier = OMP_sch_modifier_monotonic;
2476 break;
2477 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2478 Modifier = OMP_sch_modifier_nonmonotonic;
2479 break;
2480 case OMPC_SCHEDULE_MODIFIER_simd:
2481 if (Schedule == OMP_sch_static_chunked)
2482 Schedule = OMP_sch_static_balanced_chunked;
2483 break;
2486 break;
2487 }
2488 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2489 // If the static schedule kind is specified or if the ordered clause is
2490 // specified, and if the nonmonotonic modifier is not specified, the effect is
2491 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2492 // modifier is specified, the effect is as if the nonmonotonic modifier is
2493 // specified.
2494 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2495 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2496 Schedule == OMP_sch_static_balanced_chunked ||
2497 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2498 Schedule == OMP_dist_sch_static_chunked ||
2499 Schedule == OMP_dist_sch_static))
2500 Modifier = OMP_sch_modifier_nonmonotonic;
2501 }
2502 return Schedule | Modifier;
2503}
2504
2507 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2508 bool Ordered, const DispatchRTInput &DispatchValues) {
2509 if (!CGF.HaveInsertPoint())
2510 return;
2511 OpenMPSchedType Schedule = getRuntimeSchedule(
2512 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2513 assert(Ordered ||
2514 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2515 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2516 Schedule != OMP_sch_static_balanced_chunked));
2517 // Call __kmpc_dispatch_init(
2518 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2519 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2520 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2521
2522 // If the Chunk was not specified in the clause - use default value 1.
2523 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2524 : CGF.Builder.getIntN(IVSize, 1);
2525 llvm::Value *Args[] = {
2526 emitUpdateLocation(CGF, Loc),
2527 getThreadID(CGF, Loc),
2528 CGF.Builder.getInt32(addMonoNonMonoModifier(
2529 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2530 DispatchValues.LB, // Lower
2531 DispatchValues.UB, // Upper
2532 CGF.Builder.getIntN(IVSize, 1), // Stride
2533 Chunk // Chunk
2534 };
2535 CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2536 Args);
2537}
2538
2540 SourceLocation Loc) {
2541 if (!CGF.HaveInsertPoint())
2542 return;
2543 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2544 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2545 CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args);
2546}
2547
2549 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2550 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2552 const CGOpenMPRuntime::StaticRTInput &Values) {
2553 if (!CGF.HaveInsertPoint())
2554 return;
2555
2556 assert(!Values.Ordered);
2557 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2558 Schedule == OMP_sch_static_balanced_chunked ||
2559 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2560 Schedule == OMP_dist_sch_static ||
2561 Schedule == OMP_dist_sch_static_chunked);
2562
2563 // Call __kmpc_for_static_init(
2564 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2565 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2566 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2567 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2568 llvm::Value *Chunk = Values.Chunk;
2569 if (Chunk == nullptr) {
2570 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2571 Schedule == OMP_dist_sch_static) &&
2572 "expected static non-chunked schedule");
2573 // If the Chunk was not specified in the clause - use default value 1.
2574 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2575 } else {
2576 assert((Schedule == OMP_sch_static_chunked ||
2577 Schedule == OMP_sch_static_balanced_chunked ||
2578 Schedule == OMP_ord_static_chunked ||
2579 Schedule == OMP_dist_sch_static_chunked) &&
2580 "expected static chunked schedule");
2581 }
2582 llvm::Value *Args[] = {
2583 UpdateLocation,
2584 ThreadId,
2585 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2586 M2)), // Schedule type
2587 Values.IL.emitRawPointer(CGF), // &isLastIter
2588 Values.LB.emitRawPointer(CGF), // &LB
2589 Values.UB.emitRawPointer(CGF), // &UB
2590 Values.ST.emitRawPointer(CGF), // &Stride
2591 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2592 Chunk // Chunk
2593 };
2594 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2595}
2596
2598 SourceLocation Loc,
2599 OpenMPDirectiveKind DKind,
2600 const OpenMPScheduleTy &ScheduleKind,
2601 const StaticRTInput &Values) {
2602 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2603 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2604 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2605 "Expected loop-based or sections-based directive.");
2606 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2608 ? OMP_IDENT_WORK_LOOP
2609 : OMP_IDENT_WORK_SECTIONS);
2610 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2611 llvm::FunctionCallee StaticInitFunction =
2612 OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned,
2613 false);
2615 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2616 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2617}
2618
2622 const CGOpenMPRuntime::StaticRTInput &Values) {
2623 OpenMPSchedType ScheduleNum =
2624 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2625 llvm::Value *UpdatedLocation =
2626 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2627 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2628 llvm::FunctionCallee StaticInitFunction;
2629 bool isGPUDistribute =
2630 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2631 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2632 Values.IVSize, Values.IVSigned, isGPUDistribute);
2633
2634 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2635 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2637}
2638
2640 SourceLocation Loc,
2641 OpenMPDirectiveKind DKind) {
2642 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2643 DKind == OMPD_sections) &&
2644 "Expected distribute, for, or sections directive kind");
2645 if (!CGF.HaveInsertPoint())
2646 return;
2647 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2648 llvm::Value *Args[] = {
2649 emitUpdateLocation(CGF, Loc,
2651 (DKind == OMPD_target_teams_loop)
2652 ? OMP_IDENT_WORK_DISTRIBUTE
2653 : isOpenMPLoopDirective(DKind)
2654 ? OMP_IDENT_WORK_LOOP
2655 : OMP_IDENT_WORK_SECTIONS),
2656 getThreadID(CGF, Loc)};
2658 if (isOpenMPDistributeDirective(DKind) &&
2659 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2660 CGF.EmitRuntimeCall(
2661 OMPBuilder.getOrCreateRuntimeFunction(
2662 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2663 Args);
2664 else
2665 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2666 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2667 Args);
2668}
2669
2671 SourceLocation Loc,
2672 unsigned IVSize,
2673 bool IVSigned) {
2674 if (!CGF.HaveInsertPoint())
2675 return;
2676 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2677 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2678 CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2679 Args);
2680}
2681
2683 SourceLocation Loc, unsigned IVSize,
2684 bool IVSigned, Address IL,
2685 Address LB, Address UB,
2686 Address ST) {
2687 // Call __kmpc_dispatch_next(
2688 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2689 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2690 // kmp_int[32|64] *p_stride);
2691 llvm::Value *Args[] = {
2692 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2693 IL.emitRawPointer(CGF), // &isLastIter
2694 LB.emitRawPointer(CGF), // &Lower
2695 UB.emitRawPointer(CGF), // &Upper
2696 ST.emitRawPointer(CGF) // &Stride
2697 };
2698 llvm::Value *Call = CGF.EmitRuntimeCall(
2699 OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args);
2700 return CGF.EmitScalarConversion(
2701 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2702 CGF.getContext().BoolTy, Loc);
2703}
2704
2706 const Expr *Message) {
2707 if (!Message)
2708 return llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2709 return CGF.EmitScalarExpr(Message);
2710}
2711
2712llvm::Value *
2714 const OMPMessageClause *MessageClause) {
2715 return emitMessageClause(
2716 CGF, MessageClause ? MessageClause->getMessageString() : nullptr);
2717}
2718
2719llvm::Value *
2721 // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
2722 // as if sev-level is fatal."
2723 return llvm::ConstantInt::get(CGM.Int32Ty,
2724 Severity == OMPC_SEVERITY_warning ? 1 : 2);
2725}
2726
2727llvm::Value *
2729 return emitSeverityClause(SeverityClause ? SeverityClause->getSeverityKind()
2731}
2732
2734 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
2736 const Expr *Message) {
2737 if (!CGF.HaveInsertPoint())
2738 return;
2740 {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2741 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
2742 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2743 // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
2744 // messsage) if strict modifier is used.
2745 RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
2746 if (Modifier == OMPC_NUMTHREADS_strict) {
2747 FnID = OMPRTL___kmpc_push_num_threads_strict;
2748 Args.push_back(emitSeverityClause(Severity));
2749 Args.push_back(emitMessageClause(CGF, Message));
2750 }
2751 CGF.EmitRuntimeCall(
2752 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
2753}
2754
2756 ProcBindKind ProcBind,
2757 SourceLocation Loc) {
2758 if (!CGF.HaveInsertPoint())
2759 return;
2760 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2761 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2762 llvm::Value *Args[] = {
2763 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2764 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2765 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2766 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2767 Args);
2768}
2769
2771 SourceLocation Loc, llvm::AtomicOrdering AO) {
2772 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2773 OMPBuilder.createFlush(CGF.Builder);
2774 } else {
2775 if (!CGF.HaveInsertPoint())
2776 return;
2777 // Build call void __kmpc_flush(ident_t *loc)
2778 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2779 CGM.getModule(), OMPRTL___kmpc_flush),
2780 emitUpdateLocation(CGF, Loc));
2781 }
2782}
2783
2784namespace {
2785/// Indexes of fields for type kmp_task_t.
2786enum KmpTaskTFields {
2787 /// List of shared variables.
2788 KmpTaskTShareds,
2789 /// Task routine.
2790 KmpTaskTRoutine,
2791 /// Partition id for the untied tasks.
2792 KmpTaskTPartId,
2793 /// Function with call of destructors for private variables.
2794 Data1,
2795 /// Task priority.
2796 Data2,
2797 /// (Taskloops only) Lower bound.
2798 KmpTaskTLowerBound,
2799 /// (Taskloops only) Upper bound.
2800 KmpTaskTUpperBound,
2801 /// (Taskloops only) Stride.
2802 KmpTaskTStride,
2803 /// (Taskloops only) Is last iteration flag.
2804 KmpTaskTLastIter,
2805 /// (Taskloops only) Reduction data.
2806 KmpTaskTReductions,
2807};
2808} // anonymous namespace
2809
2811 // If we are in simd mode or there are no entries, we don't need to do
2812 // anything.
2813 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2814 return;
2815
2816 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2817 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2818 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2819 SourceLocation Loc;
2820 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2821 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2822 E = CGM.getContext().getSourceManager().fileinfo_end();
2823 I != E; ++I) {
2824 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2825 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2826 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2827 I->getFirst(), EntryInfo.Line, 1);
2828 break;
2829 }
2830 }
2831 }
2832 switch (Kind) {
2833 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2834 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2835 DiagnosticsEngine::Error, "Offloading entry for target region in "
2836 "%0 is incorrect: either the "
2837 "address or the ID is invalid.");
2838 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2839 } break;
2840 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2841 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2842 DiagnosticsEngine::Error, "Offloading entry for declare target "
2843 "variable %0 is incorrect: the "
2844 "address is invalid.");
2845 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2846 } break;
2847 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2848 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2850 "Offloading entry for declare target variable is incorrect: the "
2851 "address is invalid.");
2852 CGM.getDiags().Report(DiagID);
2853 } break;
2854 }
2855 };
2856
2857 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2858}
2859
2861 if (!KmpRoutineEntryPtrTy) {
2862 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2863 ASTContext &C = CGM.getContext();
2864 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2866 KmpRoutineEntryPtrQTy = C.getPointerType(
2867 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2868 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2869 }
2870}
2871
2872namespace {
2873struct PrivateHelpersTy {
2874 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2875 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2876 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2877 PrivateElemInit(PrivateElemInit) {}
2878 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2879 const Expr *OriginalRef = nullptr;
2880 const VarDecl *Original = nullptr;
2881 const VarDecl *PrivateCopy = nullptr;
2882 const VarDecl *PrivateElemInit = nullptr;
2883 bool isLocalPrivate() const {
2884 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2885 }
2886};
2887typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2888} // anonymous namespace
2889
2890static bool isAllocatableDecl(const VarDecl *VD) {
2891 const VarDecl *CVD = VD->getCanonicalDecl();
2892 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2893 return false;
2894 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2895 // Use the default allocation.
2896 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2897 !AA->getAllocator());
2898}
2899
2900static RecordDecl *
2902 if (!Privates.empty()) {
2903 ASTContext &C = CGM.getContext();
2904 // Build struct .kmp_privates_t. {
2905 // /* private vars */
2906 // };
2907 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
2908 RD->startDefinition();
2909 for (const auto &Pair : Privates) {
2910 const VarDecl *VD = Pair.second.Original;
2912 // If the private variable is a local variable with lvalue ref type,
2913 // allocate the pointer instead of the pointee type.
2914 if (Pair.second.isLocalPrivate()) {
2915 if (VD->getType()->isLValueReferenceType())
2916 Type = C.getPointerType(Type);
2917 if (isAllocatableDecl(VD))
2918 Type = C.getPointerType(Type);
2919 }
2921 if (VD->hasAttrs()) {
2922 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2923 E(VD->getAttrs().end());
2924 I != E; ++I)
2925 FD->addAttr(*I);
2926 }
2927 }
2928 RD->completeDefinition();
2929 return RD;
2930 }
2931 return nullptr;
2932}
2933
2934static RecordDecl *
2936 QualType KmpInt32Ty,
2937 QualType KmpRoutineEntryPointerQTy) {
2938 ASTContext &C = CGM.getContext();
2939 // Build struct kmp_task_t {
2940 // void * shareds;
2941 // kmp_routine_entry_t routine;
2942 // kmp_int32 part_id;
2943 // kmp_cmplrdata_t data1;
2944 // kmp_cmplrdata_t data2;
2945 // For taskloops additional fields:
2946 // kmp_uint64 lb;
2947 // kmp_uint64 ub;
2948 // kmp_int64 st;
2949 // kmp_int32 liter;
2950 // void * reductions;
2951 // };
2952 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union);
2953 UD->startDefinition();
2954 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2955 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2956 UD->completeDefinition();
2957 CanQualType KmpCmplrdataTy = C.getCanonicalTagType(UD);
2958 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
2959 RD->startDefinition();
2960 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2961 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2962 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2963 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2964 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2965 if (isOpenMPTaskLoopDirective(Kind)) {
2966 QualType KmpUInt64Ty =
2967 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2968 QualType KmpInt64Ty =
2969 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2970 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2971 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2972 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2973 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2974 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2975 }
2976 RD->completeDefinition();
2977 return RD;
2978}
2979
2980static RecordDecl *
2982 ArrayRef<PrivateDataTy> Privates) {
2983 ASTContext &C = CGM.getContext();
2984 // Build struct kmp_task_t_with_privates {
2985 // kmp_task_t task_data;
2986 // .kmp_privates_t. privates;
2987 // };
2988 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
2989 RD->startDefinition();
2990 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2991 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2992 addFieldToRecordDecl(C, RD, C.getCanonicalTagType(PrivateRD));
2993 RD->completeDefinition();
2994 return RD;
2995}
2996
2997/// Emit a proxy function which accepts kmp_task_t as the second
2998/// argument.
2999/// \code
3000/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3001/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3002/// For taskloops:
3003/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3004/// tt->reductions, tt->shareds);
3005/// return 0;
3006/// }
3007/// \endcode
3008static llvm::Function *
3010 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3011 QualType KmpTaskTWithPrivatesPtrQTy,
3012 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3013 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3014 llvm::Value *TaskPrivatesMap) {
3015 ASTContext &C = CGM.getContext();
3016 FunctionArgList Args;
3017 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3019 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3020 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3022 Args.push_back(&GtidArg);
3023 Args.push_back(&TaskTypeArg);
3024 const auto &TaskEntryFnInfo =
3025 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3026 llvm::FunctionType *TaskEntryTy =
3027 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3028 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3029 auto *TaskEntry = llvm::Function::Create(
3030 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3031 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3032 TaskEntry->setDoesNotRecurse();
3033 CodeGenFunction CGF(CGM);
3034 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3035 Loc, Loc);
3036
3037 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3038 // tt,
3039 // For taskloops:
3040 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3041 // tt->task_data.shareds);
3042 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3043 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3044 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3045 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3046 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3047 const auto *KmpTaskTWithPrivatesQTyRD =
3048 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3049 LValue Base =
3050 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3051 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3052 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3053 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3054 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3055
3056 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3057 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3058 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3059 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3060 CGF.ConvertTypeForMem(SharedsPtrTy));
3061
3062 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3063 llvm::Value *PrivatesParam;
3064 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3065 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3066 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3067 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3068 } else {
3069 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3070 }
3071
3072 llvm::Value *CommonArgs[] = {
3073 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3074 CGF.Builder
3075 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(),
3076 CGF.VoidPtrTy, CGF.Int8Ty)
3077 .emitRawPointer(CGF)};
3078 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3079 std::end(CommonArgs));
3080 if (isOpenMPTaskLoopDirective(Kind)) {
3081 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3082 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3083 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3084 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3085 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3086 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3087 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3088 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3089 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3090 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3091 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3092 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3093 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3094 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3095 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3096 CallArgs.push_back(LBParam);
3097 CallArgs.push_back(UBParam);
3098 CallArgs.push_back(StParam);
3099 CallArgs.push_back(LIParam);
3100 CallArgs.push_back(RParam);
3101 }
3102 CallArgs.push_back(SharedsParam);
3103
3104 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3105 CallArgs);
3106 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3107 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3108 CGF.FinishFunction();
3109 return TaskEntry;
3110}
3111
3113 SourceLocation Loc,
3114 QualType KmpInt32Ty,
3115 QualType KmpTaskTWithPrivatesPtrQTy,
3116 QualType KmpTaskTWithPrivatesQTy) {
3117 ASTContext &C = CGM.getContext();
3118 FunctionArgList Args;
3119 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3121 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3122 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3124 Args.push_back(&GtidArg);
3125 Args.push_back(&TaskTypeArg);
3126 const auto &DestructorFnInfo =
3127 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3128 llvm::FunctionType *DestructorFnTy =
3129 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3130 std::string Name =
3131 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3132 auto *DestructorFn =
3133 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3134 Name, &CGM.getModule());
3135 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3136 DestructorFnInfo);
3137 DestructorFn->setDoesNotRecurse();
3138 CodeGenFunction CGF(CGM);
3139 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3140 Args, Loc, Loc);
3141
3142 LValue Base = CGF.EmitLoadOfPointerLValue(
3143 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3144 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3145 const auto *KmpTaskTWithPrivatesQTyRD =
3146 KmpTaskTWithPrivatesQTy->castAsRecordDecl();
3147 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3148 Base = CGF.EmitLValueForField(Base, *FI);
3149 for (const auto *Field : FI->getType()->castAsRecordDecl()->fields()) {
3150 if (QualType::DestructionKind DtorKind =
3151 Field->getType().isDestructedType()) {
3152 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3153 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3154 }
3155 }
3156 CGF.FinishFunction();
3157 return DestructorFn;
3158}
3159
3160/// Emit a privates mapping function for correct handling of private and
3161/// firstprivate variables.
3162/// \code
3163/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3164/// **noalias priv1,..., <tyn> **noalias privn) {
3165/// *priv1 = &.privates.priv1;
3166/// ...;
3167/// *privn = &.privates.privn;
3168/// }
3169/// \endcode
3170static llvm::Value *
3172 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3173 ArrayRef<PrivateDataTy> Privates) {
3174 ASTContext &C = CGM.getContext();
3175 FunctionArgList Args;
3176 ImplicitParamDecl TaskPrivatesArg(
3177 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3178 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3180 Args.push_back(&TaskPrivatesArg);
3181 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3182 unsigned Counter = 1;
3183 for (const Expr *E : Data.PrivateVars) {
3184 Args.push_back(ImplicitParamDecl::Create(
3185 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3186 C.getPointerType(C.getPointerType(E->getType()))
3187 .withConst()
3188 .withRestrict(),
3190 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3191 PrivateVarsPos[VD] = Counter;
3192 ++Counter;
3193 }
3194 for (const Expr *E : Data.FirstprivateVars) {
3195 Args.push_back(ImplicitParamDecl::Create(
3196 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3197 C.getPointerType(C.getPointerType(E->getType()))
3198 .withConst()
3199 .withRestrict(),
3201 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3202 PrivateVarsPos[VD] = Counter;
3203 ++Counter;
3204 }
3205 for (const Expr *E : Data.LastprivateVars) {
3206 Args.push_back(ImplicitParamDecl::Create(
3207 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3208 C.getPointerType(C.getPointerType(E->getType()))
3209 .withConst()
3210 .withRestrict(),
3212 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3213 PrivateVarsPos[VD] = Counter;
3214 ++Counter;
3215 }
3216 for (const VarDecl *VD : Data.PrivateLocals) {
3218 if (VD->getType()->isLValueReferenceType())
3219 Ty = C.getPointerType(Ty);
3220 if (isAllocatableDecl(VD))
3221 Ty = C.getPointerType(Ty);
3222 Args.push_back(ImplicitParamDecl::Create(
3223 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3224 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3226 PrivateVarsPos[VD] = Counter;
3227 ++Counter;
3228 }
3229 const auto &TaskPrivatesMapFnInfo =
3230 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3231 llvm::FunctionType *TaskPrivatesMapTy =
3232 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3233 std::string Name =
3234 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3235 auto *TaskPrivatesMap = llvm::Function::Create(
3236 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3237 &CGM.getModule());
3238 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3239 TaskPrivatesMapFnInfo);
3240 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
3241 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3242 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3243 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3244 }
3245 CodeGenFunction CGF(CGM);
3246 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3247 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3248
3249 // *privi = &.privates.privi;
3250 LValue Base = CGF.EmitLoadOfPointerLValue(
3251 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3252 TaskPrivatesArg.getType()->castAs<PointerType>());
3253 const auto *PrivatesQTyRD = PrivatesQTy->castAsRecordDecl();
3254 Counter = 0;
3255 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3256 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3257 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3258 LValue RefLVal =
3259 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3260 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3261 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
3262 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3263 ++Counter;
3264 }
3265 CGF.FinishFunction();
3266 return TaskPrivatesMap;
3267}
3268
3269/// Emit initialization for private variables in task-based directives.
3271 const OMPExecutableDirective &D,
3272 Address KmpTaskSharedsPtr, LValue TDBase,
3273 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3274 QualType SharedsTy, QualType SharedsPtrTy,
3275 const OMPTaskDataTy &Data,
3276 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3277 ASTContext &C = CGF.getContext();
3278 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3279 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3280 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3281 ? OMPD_taskloop
3282 : OMPD_task;
3283 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3284 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3285 LValue SrcBase;
3286 bool IsTargetTask =
3287 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3288 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3289 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3290 // PointersArray, SizesArray, and MappersArray. The original variables for
3291 // these arrays are not captured and we get their addresses explicitly.
3292 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3293 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3294 SrcBase = CGF.MakeAddrLValue(
3296 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3297 CGF.ConvertTypeForMem(SharedsTy)),
3298 SharedsTy);
3299 }
3300 FI = FI->getType()->castAsRecordDecl()->field_begin();
3301 for (const PrivateDataTy &Pair : Privates) {
3302 // Do not initialize private locals.
3303 if (Pair.second.isLocalPrivate()) {
3304 ++FI;
3305 continue;
3306 }
3307 const VarDecl *VD = Pair.second.PrivateCopy;
3308 const Expr *Init = VD->getAnyInitializer();
3309 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3310 !CGF.isTrivialInitializer(Init)))) {
3311 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3312 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3313 const VarDecl *OriginalVD = Pair.second.Original;
3314 // Check if the variable is the target-based BasePointersArray,
3315 // PointersArray, SizesArray, or MappersArray.
3316 LValue SharedRefLValue;
3317 QualType Type = PrivateLValue.getType();
3318 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3319 if (IsTargetTask && !SharedField) {
3320 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3321 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3322 cast<CapturedDecl>(OriginalVD->getDeclContext())
3323 ->getNumParams() == 0 &&
3325 cast<CapturedDecl>(OriginalVD->getDeclContext())
3326 ->getDeclContext()) &&
3327 "Expected artificial target data variable.");
3328 SharedRefLValue =
3329 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3330 } else if (ForDup) {
3331 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3332 SharedRefLValue = CGF.MakeAddrLValue(
3333 SharedRefLValue.getAddress().withAlignment(
3334 C.getDeclAlign(OriginalVD)),
3335 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3336 SharedRefLValue.getTBAAInfo());
3337 } else if (CGF.LambdaCaptureFields.count(
3338 Pair.second.Original->getCanonicalDecl()) > 0 ||
3339 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3340 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3341 } else {
3342 // Processing for implicitly captured variables.
3343 InlinedOpenMPRegionRAII Region(
3344 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3345 /*HasCancel=*/false, /*NoInheritance=*/true);
3346 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3347 }
3348 if (Type->isArrayType()) {
3349 // Initialize firstprivate array.
3351 // Perform simple memcpy.
3352 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3353 } else {
3354 // Initialize firstprivate array using element-by-element
3355 // initialization.
3357 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
3358 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3359 Address SrcElement) {
3360 // Clean up any temporaries needed by the initialization.
3361 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3362 InitScope.addPrivate(Elem, SrcElement);
3363 (void)InitScope.Privatize();
3364 // Emit initialization for single element.
3365 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3366 CGF, &CapturesInfo);
3367 CGF.EmitAnyExprToMem(Init, DestElement,
3368 Init->getType().getQualifiers(),
3369 /*IsInitializer=*/false);
3370 });
3371 }
3372 } else {
3373 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3374 InitScope.addPrivate(Elem, SharedRefLValue.getAddress());
3375 (void)InitScope.Privatize();
3376 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3377 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3378 /*capturedByInit=*/false);
3379 }
3380 } else {
3381 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3382 }
3383 }
3384 ++FI;
3385 }
3386}
3387
3388/// Check if duplication function is required for taskloops.
3390 ArrayRef<PrivateDataTy> Privates) {
3391 bool InitRequired = false;
3392 for (const PrivateDataTy &Pair : Privates) {
3393 if (Pair.second.isLocalPrivate())
3394 continue;
3395 const VarDecl *VD = Pair.second.PrivateCopy;
3396 const Expr *Init = VD->getAnyInitializer();
3397 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3399 if (InitRequired)
3400 break;
3401 }
3402 return InitRequired;
3403}
3404
3405
3406/// Emit task_dup function (for initialization of
3407/// private/firstprivate/lastprivate vars and last_iter flag)
3408/// \code
3409/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3410/// lastpriv) {
3411/// // setup lastprivate flag
3412/// task_dst->last = lastpriv;
3413/// // could be constructor calls here...
3414/// }
3415/// \endcode
3416static llvm::Value *
3418 const OMPExecutableDirective &D,
3419 QualType KmpTaskTWithPrivatesPtrQTy,
3420 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3421 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3422 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3423 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3424 ASTContext &C = CGM.getContext();
3425 FunctionArgList Args;
3426 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3427 KmpTaskTWithPrivatesPtrQTy,
3429 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3430 KmpTaskTWithPrivatesPtrQTy,
3432 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3434 Args.push_back(&DstArg);
3435 Args.push_back(&SrcArg);
3436 Args.push_back(&LastprivArg);
3437 const auto &TaskDupFnInfo =
3438 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3439 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3440 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3441 auto *TaskDup = llvm::Function::Create(
3442 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3443 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3444 TaskDup->setDoesNotRecurse();
3445 CodeGenFunction CGF(CGM);
3446 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3447 Loc);
3448
3449 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3450 CGF.GetAddrOfLocalVar(&DstArg),
3451 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3452 // task_dst->liter = lastpriv;
3453 if (WithLastIter) {
3454 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3455 LValue Base = CGF.EmitLValueForField(
3456 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3457 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3458 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3459 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3460 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3461 }
3462
3463 // Emit initial values for private copies (if any).
3464 assert(!Privates.empty());
3465 Address KmpTaskSharedsPtr = Address::invalid();
3466 if (!Data.FirstprivateVars.empty()) {
3467 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3468 CGF.GetAddrOfLocalVar(&SrcArg),
3469 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3470 LValue Base = CGF.EmitLValueForField(
3471 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3472 KmpTaskSharedsPtr = Address(
3474 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3475 KmpTaskTShareds)),
3476 Loc),
3477 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3478 }
3479 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3480 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3481 CGF.FinishFunction();
3482 return TaskDup;
3483}
3484
3485/// Checks if destructor function is required to be generated.
3486/// \return true if cleanups are required, false otherwise.
3487static bool
3488checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3489 ArrayRef<PrivateDataTy> Privates) {
3490 for (const PrivateDataTy &P : Privates) {
3491 if (P.second.isLocalPrivate())
3492 continue;
3493 QualType Ty = P.second.Original->getType().getNonReferenceType();
3494 if (Ty.isDestructedType())
3495 return true;
3496 }
3497 return false;
3498}
3499
3500namespace {
3501/// Loop generator for OpenMP iterator expression.
3502class OMPIteratorGeneratorScope final
3504 CodeGenFunction &CGF;
3505 const OMPIteratorExpr *E = nullptr;
3506 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3507 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3508 OMPIteratorGeneratorScope() = delete;
3509 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3510
3511public:
3512 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3513 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3514 if (!E)
3515 return;
3516 SmallVector<llvm::Value *, 4> Uppers;
3517 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3518 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3519 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3520 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3521 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3522 addPrivate(
3523 HelperData.CounterVD,
3524 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3525 }
3526 Privatize();
3527
3528 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3529 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3530 LValue CLVal =
3531 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3532 HelperData.CounterVD->getType());
3533 // Counter = 0;
3534 CGF.EmitStoreOfScalar(
3535 llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0),
3536 CLVal);
3537 CodeGenFunction::JumpDest &ContDest =
3538 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3539 CodeGenFunction::JumpDest &ExitDest =
3540 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3541 // N = <number-of_iterations>;
3542 llvm::Value *N = Uppers[I];
3543 // cont:
3544 // if (Counter < N) goto body; else goto exit;
3545 CGF.EmitBlock(ContDest.getBlock());
3546 auto *CVal =
3547 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3548 llvm::Value *Cmp =
3549 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3550 ? CGF.Builder.CreateICmpSLT(CVal, N)
3551 : CGF.Builder.CreateICmpULT(CVal, N);
3552 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3553 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3554 // body:
3555 CGF.EmitBlock(BodyBB);
3556 // Iteri = Begini + Counter * Stepi;
3557 CGF.EmitIgnoredExpr(HelperData.Update);
3558 }
3559 }
3560 ~OMPIteratorGeneratorScope() {
3561 if (!E)
3562 return;
3563 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3564 // Counter = Counter + 1;
3565 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3566 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3567 // goto cont;
3568 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3569 // exit:
3570 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3571 }
3572 }
3573};
3574} // namespace
3575
3576static std::pair<llvm::Value *, llvm::Value *>
3578 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3579 llvm::Value *Addr;
3580 if (OASE) {
3581 const Expr *Base = OASE->getBase();
3582 Addr = CGF.EmitScalarExpr(Base);
3583 } else {
3584 Addr = CGF.EmitLValue(E).getPointer(CGF);
3585 }
3586 llvm::Value *SizeVal;
3587 QualType Ty = E->getType();
3588 if (OASE) {
3589 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3590 for (const Expr *SE : OASE->getDimensions()) {
3591 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3592 Sz = CGF.EmitScalarConversion(
3593 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3594 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3595 }
3596 } else if (const auto *ASE =
3597 dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) {
3598 LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false);
3599 Address UpAddrAddress = UpAddrLVal.getAddress();
3600 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3601 UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF),
3602 /*Idx0=*/1);
3603 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3604 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3605 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3606 } else {
3607 SizeVal = CGF.getTypeSize(Ty);
3608 }
3609 return std::make_pair(Addr, SizeVal);
3610}
3611
3612/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3613static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3614 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3615 if (KmpTaskAffinityInfoTy.isNull()) {
3616 RecordDecl *KmpAffinityInfoRD =
3617 C.buildImplicitRecord("kmp_task_affinity_info_t");
3618 KmpAffinityInfoRD->startDefinition();
3619 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3620 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3621 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3622 KmpAffinityInfoRD->completeDefinition();
3623 KmpTaskAffinityInfoTy = C.getCanonicalTagType(KmpAffinityInfoRD);
3624 }
3625}
3626
3629 const OMPExecutableDirective &D,
3630 llvm::Function *TaskFunction, QualType SharedsTy,
3631 Address Shareds, const OMPTaskDataTy &Data) {
3632 ASTContext &C = CGM.getContext();
3634 // Aggregate privates and sort them by the alignment.
3635 const auto *I = Data.PrivateCopies.begin();
3636 for (const Expr *E : Data.PrivateVars) {
3637 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3638 Privates.emplace_back(
3639 C.getDeclAlign(VD),
3640 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3641 /*PrivateElemInit=*/nullptr));
3642 ++I;
3643 }
3644 I = Data.FirstprivateCopies.begin();
3645 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3646 for (const Expr *E : Data.FirstprivateVars) {
3647 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3648 Privates.emplace_back(
3649 C.getDeclAlign(VD),
3650 PrivateHelpersTy(
3651 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3652 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3653 ++I;
3654 ++IElemInitRef;
3655 }
3656 I = Data.LastprivateCopies.begin();
3657 for (const Expr *E : Data.LastprivateVars) {
3658 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3659 Privates.emplace_back(
3660 C.getDeclAlign(VD),
3661 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3662 /*PrivateElemInit=*/nullptr));
3663 ++I;
3664 }
3665 for (const VarDecl *VD : Data.PrivateLocals) {
3666 if (isAllocatableDecl(VD))
3667 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3668 else
3669 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3670 }
3671 llvm::stable_sort(Privates,
3672 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3673 return L.first > R.first;
3674 });
3675 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3676 // Build type kmp_routine_entry_t (if not built yet).
3677 emitKmpRoutineEntryT(KmpInt32Ty);
3678 // Build type kmp_task_t (if not built yet).
3679 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3680 if (SavedKmpTaskloopTQTy.isNull()) {
3681 SavedKmpTaskloopTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3682 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3683 }
3685 } else {
3686 assert((D.getDirectiveKind() == OMPD_task ||
3687 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3688 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3689 "Expected taskloop, task or target directive");
3690 if (SavedKmpTaskTQTy.isNull()) {
3691 SavedKmpTaskTQTy = C.getCanonicalTagType(createKmpTaskTRecordDecl(
3692 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3693 }
3695 }
3696 const auto *KmpTaskTQTyRD = KmpTaskTQTy->castAsRecordDecl();
3697 // Build particular struct kmp_task_t for the given task.
3698 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3700 CanQualType KmpTaskTWithPrivatesQTy =
3701 C.getCanonicalTagType(KmpTaskTWithPrivatesQTyRD);
3702 QualType KmpTaskTWithPrivatesPtrQTy =
3703 C.getPointerType(KmpTaskTWithPrivatesQTy);
3704 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(0);
3705 llvm::Value *KmpTaskTWithPrivatesTySize =
3706 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3707 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3708
3709 // Emit initial values for private copies (if any).
3710 llvm::Value *TaskPrivatesMap = nullptr;
3711 llvm::Type *TaskPrivatesMapTy =
3712 std::next(TaskFunction->arg_begin(), 3)->getType();
3713 if (!Privates.empty()) {
3714 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3715 TaskPrivatesMap =
3716 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3717 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3718 TaskPrivatesMap, TaskPrivatesMapTy);
3719 } else {
3720 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3721 cast<llvm::PointerType>(TaskPrivatesMapTy));
3722 }
3723 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3724 // kmp_task_t *tt);
3725 llvm::Function *TaskEntry = emitProxyTaskFunction(
3726 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3727 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3728 TaskPrivatesMap);
3729
3730 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3731 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3732 // kmp_routine_entry_t *task_entry);
3733 // Task flags. Format is taken from
3734 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3735 // description of kmp_tasking_flags struct.
3736 enum {
3737 TiedFlag = 0x1,
3738 FinalFlag = 0x2,
3739 DestructorsFlag = 0x8,
3740 PriorityFlag = 0x20,
3741 DetachableFlag = 0x40,
3742 };
3743 unsigned Flags = Data.Tied ? TiedFlag : 0;
3744 bool NeedsCleanup = false;
3745 if (!Privates.empty()) {
3746 NeedsCleanup =
3747 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3748 if (NeedsCleanup)
3749 Flags = Flags | DestructorsFlag;
3750 }
3751 if (Data.Priority.getInt())
3752 Flags = Flags | PriorityFlag;
3753 if (D.hasClausesOfKind<OMPDetachClause>())
3754 Flags = Flags | DetachableFlag;
3755 llvm::Value *TaskFlags =
3756 Data.Final.getPointer()
3757 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3758 CGF.Builder.getInt32(FinalFlag),
3759 CGF.Builder.getInt32(/*C=*/0))
3760 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3761 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3762 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3764 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3766 TaskEntry, KmpRoutineEntryPtrTy)};
3767 llvm::Value *NewTask;
3768 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3769 // Check if we have any device clause associated with the directive.
3770 const Expr *Device = nullptr;
3771 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3772 Device = C->getDevice();
3773 // Emit device ID if any otherwise use default value.
3774 llvm::Value *DeviceID;
3775 if (Device)
3776 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3777 CGF.Int64Ty, /*isSigned=*/true);
3778 else
3779 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3780 AllocArgs.push_back(DeviceID);
3781 NewTask = CGF.EmitRuntimeCall(
3782 OMPBuilder.getOrCreateRuntimeFunction(
3783 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3784 AllocArgs);
3785 } else {
3786 NewTask =
3787 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3788 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3789 AllocArgs);
3790 }
3791 // Emit detach clause initialization.
3792 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3793 // task_descriptor);
3794 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3795 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3796 LValue EvtLVal = CGF.EmitLValue(Evt);
3797
3798 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3799 // int gtid, kmp_task_t *task);
3800 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3801 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3802 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3803 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3804 OMPBuilder.getOrCreateRuntimeFunction(
3805 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3806 {Loc, Tid, NewTask});
3807 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3808 Evt->getExprLoc());
3809 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3810 }
3811 // Process affinity clauses.
3812 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3813 // Process list of affinity data.
3814 ASTContext &C = CGM.getContext();
3815 Address AffinitiesArray = Address::invalid();
3816 // Calculate number of elements to form the array of affinity data.
3817 llvm::Value *NumOfElements = nullptr;
3818 unsigned NumAffinities = 0;
3819 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3820 if (const Expr *Modifier = C->getModifier()) {
3821 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3822 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3823 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3824 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3825 NumOfElements =
3826 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3827 }
3828 } else {
3829 NumAffinities += C->varlist_size();
3830 }
3831 }
3833 // Fields ids in kmp_task_affinity_info record.
3834 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3835
3836 QualType KmpTaskAffinityInfoArrayTy;
3837 if (NumOfElements) {
3838 NumOfElements = CGF.Builder.CreateNUWAdd(
3839 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3840 auto *OVE = new (C) OpaqueValueExpr(
3841 Loc,
3842 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3843 VK_PRValue);
3844 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3845 RValue::get(NumOfElements));
3846 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3848 /*IndexTypeQuals=*/0);
3849 // Properly emit variable-sized array.
3850 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
3852 CGF.EmitVarDecl(*PD);
3853 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3854 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
3855 /*isSigned=*/false);
3856 } else {
3857 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3859 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3860 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3861 AffinitiesArray =
3862 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
3863 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
3864 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
3865 /*isSigned=*/false);
3866 }
3867
3868 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3869 // Fill array by elements without iterators.
3870 unsigned Pos = 0;
3871 bool HasIterator = false;
3872 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3873 if (C->getModifier()) {
3874 HasIterator = true;
3875 continue;
3876 }
3877 for (const Expr *E : C->varlist()) {
3878 llvm::Value *Addr;
3879 llvm::Value *Size;
3880 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3881 LValue Base =
3882 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3884 // affs[i].base_addr = &<Affinities[i].second>;
3885 LValue BaseAddrLVal = CGF.EmitLValueForField(
3886 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3887 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3888 BaseAddrLVal);
3889 // affs[i].len = sizeof(<Affinities[i].second>);
3890 LValue LenLVal = CGF.EmitLValueForField(
3891 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3892 CGF.EmitStoreOfScalar(Size, LenLVal);
3893 ++Pos;
3894 }
3895 }
3896 LValue PosLVal;
3897 if (HasIterator) {
3898 PosLVal = CGF.MakeAddrLValue(
3899 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
3900 C.getSizeType());
3901 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
3902 }
3903 // Process elements with iterators.
3904 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3905 const Expr *Modifier = C->getModifier();
3906 if (!Modifier)
3907 continue;
3908 OMPIteratorGeneratorScope IteratorScope(
3909 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
3910 for (const Expr *E : C->varlist()) {
3911 llvm::Value *Addr;
3912 llvm::Value *Size;
3913 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3914 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3915 LValue Base =
3916 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3918 // affs[i].base_addr = &<Affinities[i].second>;
3919 LValue BaseAddrLVal = CGF.EmitLValueForField(
3920 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3921 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3922 BaseAddrLVal);
3923 // affs[i].len = sizeof(<Affinities[i].second>);
3924 LValue LenLVal = CGF.EmitLValueForField(
3925 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3926 CGF.EmitStoreOfScalar(Size, LenLVal);
3927 Idx = CGF.Builder.CreateNUWAdd(
3928 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3929 CGF.EmitStoreOfScalar(Idx, PosLVal);
3930 }
3931 }
3932 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3933 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3934 // naffins, kmp_task_affinity_info_t *affin_list);
3935 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3936 llvm::Value *GTid = getThreadID(CGF, Loc);
3937 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3938 AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy);
3939 // FIXME: Emit the function and ignore its result for now unless the
3940 // runtime function is properly implemented.
3941 (void)CGF.EmitRuntimeCall(
3942 OMPBuilder.getOrCreateRuntimeFunction(
3943 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
3944 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3945 }
3946 llvm::Value *NewTaskNewTaskTTy =
3948 NewTask, KmpTaskTWithPrivatesPtrTy);
3949 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy,
3950 KmpTaskTWithPrivatesQTy);
3951 LValue TDBase =
3952 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
3953 // Fill the data in the resulting kmp_task_t record.
3954 // Copy shareds if there are any.
3955 Address KmpTaskSharedsPtr = Address::invalid();
3956 if (!SharedsTy->castAsRecordDecl()->field_empty()) {
3957 KmpTaskSharedsPtr = Address(
3958 CGF.EmitLoadOfScalar(
3960 TDBase,
3961 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3962 Loc),
3963 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3964 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
3965 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
3966 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
3967 }
3968 // Emit initial values for private copies (if any).
3970 if (!Privates.empty()) {
3971 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
3972 SharedsTy, SharedsPtrTy, Data, Privates,
3973 /*ForDup=*/false);
3974 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3975 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3976 Result.TaskDupFn = emitTaskDupFunction(
3977 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3978 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3979 /*WithLastIter=*/!Data.LastprivateVars.empty());
3980 }
3981 }
3982 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3983 enum { Priority = 0, Destructors = 1 };
3984 // Provide pointer to function with destructors for privates.
3985 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3986 const auto *KmpCmplrdataUD = (*FI)->getType()->castAsRecordDecl();
3987 assert(KmpCmplrdataUD->isUnion());
3988 if (NeedsCleanup) {
3989 llvm::Value *DestructorFn = emitDestructorsFunction(
3990 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3991 KmpTaskTWithPrivatesQTy);
3992 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
3993 LValue DestructorsLV = CGF.EmitLValueForField(
3994 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
3996 DestructorFn, KmpRoutineEntryPtrTy),
3997 DestructorsLV);
3998 }
3999 // Set priority.
4000 if (Data.Priority.getInt()) {
4001 LValue Data2LV = CGF.EmitLValueForField(
4002 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4003 LValue PriorityLV = CGF.EmitLValueForField(
4004 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4005 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4006 }
4007 Result.NewTask = NewTask;
4008 Result.TaskEntry = TaskEntry;
4009 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4010 Result.TDBase = TDBase;
4011 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4012 return Result;
4013}
4014
4015/// Translates internal dependency kind into the runtime kind.
4017 RTLDependenceKindTy DepKind;
4018 switch (K) {
4019 case OMPC_DEPEND_in:
4020 DepKind = RTLDependenceKindTy::DepIn;
4021 break;
4022 // Out and InOut dependencies must use the same code.
4023 case OMPC_DEPEND_out:
4024 case OMPC_DEPEND_inout:
4025 DepKind = RTLDependenceKindTy::DepInOut;
4026 break;
4027 case OMPC_DEPEND_mutexinoutset:
4028 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4029 break;
4030 case OMPC_DEPEND_inoutset:
4031 DepKind = RTLDependenceKindTy::DepInOutSet;
4032 break;
4033 case OMPC_DEPEND_outallmemory:
4034 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4035 break;
4036 case OMPC_DEPEND_source:
4037 case OMPC_DEPEND_sink:
4038 case OMPC_DEPEND_depobj:
4039 case OMPC_DEPEND_inoutallmemory:
4041 llvm_unreachable("Unknown task dependence type");
4042 }
4043 return DepKind;
4044}
4045
4046/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4047static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4048 QualType &FlagsTy) {
4049 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4050 if (KmpDependInfoTy.isNull()) {
4051 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4052 KmpDependInfoRD->startDefinition();
4053 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4054 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4055 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4056 KmpDependInfoRD->completeDefinition();
4057 KmpDependInfoTy = C.getCanonicalTagType(KmpDependInfoRD);
4058 }
4059}
4060
4061std::pair<llvm::Value *, LValue>
4063 SourceLocation Loc) {
4064 ASTContext &C = CGM.getContext();
4065 QualType FlagsTy;
4066 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4067 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4068 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4070 DepobjLVal.getAddress().withElementType(
4071 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4072 KmpDependInfoPtrTy->castAs<PointerType>());
4073 Address DepObjAddr = CGF.Builder.CreateGEP(
4074 CGF, Base.getAddress(),
4075 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4076 LValue NumDepsBase = CGF.MakeAddrLValue(
4077 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4078 // NumDeps = deps[i].base_addr;
4079 LValue BaseAddrLVal = CGF.EmitLValueForField(
4080 NumDepsBase,
4081 *std::next(KmpDependInfoRD->field_begin(),
4082 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4083 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4084 return std::make_pair(NumDeps, Base);
4085}
4086
4087static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4088 llvm::PointerUnion<unsigned *, LValue *> Pos,
4090 Address DependenciesArray) {
4091 CodeGenModule &CGM = CGF.CGM;
4092 ASTContext &C = CGM.getContext();
4093 QualType FlagsTy;
4094 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4095 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4096 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4097
4098 OMPIteratorGeneratorScope IteratorScope(
4099 CGF, cast_or_null<OMPIteratorExpr>(
4100 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4101 : nullptr));
4102 for (const Expr *E : Data.DepExprs) {
4103 llvm::Value *Addr;
4104 llvm::Value *Size;
4105
4106 // The expression will be a nullptr in the 'omp_all_memory' case.
4107 if (E) {
4108 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4109 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4110 } else {
4111 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4112 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4113 }
4114 LValue Base;
4115 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4116 Base = CGF.MakeAddrLValue(
4117 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4118 } else {
4119 assert(E && "Expected a non-null expression");
4120 LValue &PosLVal = *cast<LValue *>(Pos);
4121 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4122 Base = CGF.MakeAddrLValue(
4123 CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy);
4124 }
4125 // deps[i].base_addr = &<Dependencies[i].second>;
4126 LValue BaseAddrLVal = CGF.EmitLValueForField(
4127 Base,
4128 *std::next(KmpDependInfoRD->field_begin(),
4129 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4130 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4131 // deps[i].len = sizeof(<Dependencies[i].second>);
4132 LValue LenLVal = CGF.EmitLValueForField(
4133 Base, *std::next(KmpDependInfoRD->field_begin(),
4134 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4135 CGF.EmitStoreOfScalar(Size, LenLVal);
4136 // deps[i].flags = <Dependencies[i].first>;
4137 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4138 LValue FlagsLVal = CGF.EmitLValueForField(
4139 Base,
4140 *std::next(KmpDependInfoRD->field_begin(),
4141 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4143 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4144 FlagsLVal);
4145 if (unsigned *P = dyn_cast<unsigned *>(Pos)) {
4146 ++(*P);
4147 } else {
4148 LValue &PosLVal = *cast<LValue *>(Pos);
4149 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4150 Idx = CGF.Builder.CreateNUWAdd(Idx,
4151 llvm::ConstantInt::get(Idx->getType(), 1));
4152 CGF.EmitStoreOfScalar(Idx, PosLVal);
4153 }
4154 }
4155}
4156
4160 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4161 "Expected depobj dependency kind.");
4163 SmallVector<LValue, 4> SizeLVals;
4164 ASTContext &C = CGF.getContext();
4165 {
4166 OMPIteratorGeneratorScope IteratorScope(
4167 CGF, cast_or_null<OMPIteratorExpr>(
4168 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4169 : nullptr));
4170 for (const Expr *E : Data.DepExprs) {
4171 llvm::Value *NumDeps;
4172 LValue Base;
4173 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4174 std::tie(NumDeps, Base) =
4175 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4176 LValue NumLVal = CGF.MakeAddrLValue(
4177 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4178 C.getUIntPtrType());
4179 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4180 NumLVal.getAddress());
4181 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4182 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4183 CGF.EmitStoreOfScalar(Add, NumLVal);
4184 SizeLVals.push_back(NumLVal);
4185 }
4186 }
4187 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4188 llvm::Value *Size =
4189 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4190 Sizes.push_back(Size);
4191 }
4192 return Sizes;
4193}
4194
4197 LValue PosLVal,
4199 Address DependenciesArray) {
4200 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4201 "Expected depobj dependency kind.");
4202 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4203 {
4204 OMPIteratorGeneratorScope IteratorScope(
4205 CGF, cast_or_null<OMPIteratorExpr>(
4206 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4207 : nullptr));
4208 for (const Expr *E : Data.DepExprs) {
4209 llvm::Value *NumDeps;
4210 LValue Base;
4211 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4212 std::tie(NumDeps, Base) =
4213 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4214
4215 // memcopy dependency data.
4216 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4217 ElSize,
4218 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4219 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4220 Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos);
4221 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size);
4222
4223 // Increase pos.
4224 // pos += size;
4225 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4226 CGF.EmitStoreOfScalar(Add, PosLVal);
4227 }
4228 }
4229}
4230
4231std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4233 SourceLocation Loc) {
4234 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4235 return D.DepExprs.empty();
4236 }))
4237 return std::make_pair(nullptr, Address::invalid());
4238 // Process list of dependencies.
4239 ASTContext &C = CGM.getContext();
4240 Address DependenciesArray = Address::invalid();
4241 llvm::Value *NumOfElements = nullptr;
4242 unsigned NumDependencies = std::accumulate(
4243 Dependencies.begin(), Dependencies.end(), 0,
4244 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4245 return D.DepKind == OMPC_DEPEND_depobj
4246 ? V
4247 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4248 });
4249 QualType FlagsTy;
4250 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4251 bool HasDepobjDeps = false;
4252 bool HasRegularWithIterators = false;
4253 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4254 llvm::Value *NumOfRegularWithIterators =
4255 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4256 // Calculate number of depobj dependencies and regular deps with the
4257 // iterators.
4258 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4259 if (D.DepKind == OMPC_DEPEND_depobj) {
4262 for (llvm::Value *Size : Sizes) {
4263 NumOfDepobjElements =
4264 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4265 }
4266 HasDepobjDeps = true;
4267 continue;
4268 }
4269 // Include number of iterations, if any.
4270
4271 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4272 llvm::Value *ClauseIteratorSpace =
4273 llvm::ConstantInt::get(CGF.IntPtrTy, 1);
4274 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4275 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4276 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4277 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace);
4278 }
4279 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4280 ClauseIteratorSpace,
4281 llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4282 NumOfRegularWithIterators =
4283 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4284 HasRegularWithIterators = true;
4285 continue;
4286 }
4287 }
4288
4289 QualType KmpDependInfoArrayTy;
4290 if (HasDepobjDeps || HasRegularWithIterators) {
4291 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4292 /*isSigned=*/false);
4293 if (HasDepobjDeps) {
4294 NumOfElements =
4295 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4296 }
4297 if (HasRegularWithIterators) {
4298 NumOfElements =
4299 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4300 }
4301 auto *OVE = new (C) OpaqueValueExpr(
4302 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4303 VK_PRValue);
4304 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4305 RValue::get(NumOfElements));
4306 KmpDependInfoArrayTy =
4307 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4308 /*IndexTypeQuals=*/0);
4309 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4310 // Properly emit variable-sized array.
4311 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4313 CGF.EmitVarDecl(*PD);
4314 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4315 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4316 /*isSigned=*/false);
4317 } else {
4318 KmpDependInfoArrayTy = C.getConstantArrayType(
4319 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4320 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4321 DependenciesArray =
4322 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4323 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4324 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4325 /*isSigned=*/false);
4326 }
4327 unsigned Pos = 0;
4328 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4329 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4330 continue;
4331 emitDependData(CGF, KmpDependInfoTy, &Pos, Dep, DependenciesArray);
4332 }
4333 // Copy regular dependencies with iterators.
4334 LValue PosLVal = CGF.MakeAddrLValue(
4335 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4336 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4337 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4338 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4339 continue;
4340 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dep, DependenciesArray);
4341 }
4342 // Copy final depobj arrays without iterators.
4343 if (HasDepobjDeps) {
4344 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4345 if (Dep.DepKind != OMPC_DEPEND_depobj)
4346 continue;
4347 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dep, DependenciesArray);
4348 }
4349 }
4350 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4351 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4352 return std::make_pair(NumOfElements, DependenciesArray);
4353}
4354
4356 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4357 SourceLocation Loc) {
4358 if (Dependencies.DepExprs.empty())
4359 return Address::invalid();
4360 // Process list of dependencies.
4361 ASTContext &C = CGM.getContext();
4362 Address DependenciesArray = Address::invalid();
4363 unsigned NumDependencies = Dependencies.DepExprs.size();
4364 QualType FlagsTy;
4365 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4366 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4367
4368 llvm::Value *Size;
4369 // Define type kmp_depend_info[<Dependencies.size()>];
4370 // For depobj reserve one extra element to store the number of elements.
4371 // It is required to handle depobj(x) update(in) construct.
4372 // kmp_depend_info[<Dependencies.size()>] deps;
4373 llvm::Value *NumDepsVal;
4374 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4375 if (const auto *IE =
4376 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4377 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4378 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4379 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4380 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4381 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4382 }
4383 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4384 NumDepsVal);
4385 CharUnits SizeInBytes =
4386 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4387 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4388 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4389 NumDepsVal =
4390 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4391 } else {
4392 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4393 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4394 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4395 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4396 Size = CGM.getSize(Sz.alignTo(Align));
4397 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4398 }
4399 // Need to allocate on the dynamic memory.
4400 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4401 // Use default allocator.
4402 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4403 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4404
4405 llvm::Value *Addr =
4406 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4407 CGM.getModule(), OMPRTL___kmpc_alloc),
4408 Args, ".dep.arr.addr");
4409 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4411 Addr, CGF.Builder.getPtrTy(0));
4412 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4413 // Write number of elements in the first element of array for depobj.
4414 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4415 // deps[i].base_addr = NumDependencies;
4416 LValue BaseAddrLVal = CGF.EmitLValueForField(
4417 Base,
4418 *std::next(KmpDependInfoRD->field_begin(),
4419 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4420 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4421 llvm::PointerUnion<unsigned *, LValue *> Pos;
4422 unsigned Idx = 1;
4423 LValue PosLVal;
4424 if (Dependencies.IteratorExpr) {
4425 PosLVal = CGF.MakeAddrLValue(
4426 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4427 C.getSizeType());
4428 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4429 /*IsInit=*/true);
4430 Pos = &PosLVal;
4431 } else {
4432 Pos = &Idx;
4433 }
4434 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4435 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4436 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4437 CGF.Int8Ty);
4438 return DependenciesArray;
4439}
4440
4442 SourceLocation Loc) {
4443 ASTContext &C = CGM.getContext();
4444 QualType FlagsTy;
4445 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4446 LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(),
4447 C.VoidPtrTy.castAs<PointerType>());
4448 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4450 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4452 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4453 Addr.getElementType(), Addr.emitRawPointer(CGF),
4454 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4455 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4456 CGF.VoidPtrTy);
4457 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4458 // Use default allocator.
4459 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4460 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4461
4462 // _kmpc_free(gtid, addr, nullptr);
4463 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4464 CGM.getModule(), OMPRTL___kmpc_free),
4465 Args);
4466}
4467
4469 OpenMPDependClauseKind NewDepKind,
4470 SourceLocation Loc) {
4471 ASTContext &C = CGM.getContext();
4472 QualType FlagsTy;
4473 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4474 auto *KmpDependInfoRD = KmpDependInfoTy->castAsRecordDecl();
4475 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4476 llvm::Value *NumDeps;
4477 LValue Base;
4478 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4479
4480 Address Begin = Base.getAddress();
4481 // Cast from pointer to array type to pointer to single element.
4482 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(),
4483 Begin.emitRawPointer(CGF), NumDeps);
4484 // The basic structure here is a while-do loop.
4485 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4486 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4487 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4488 CGF.EmitBlock(BodyBB);
4489 llvm::PHINode *ElementPHI =
4490 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4491 ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB);
4492 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4493 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4494 Base.getTBAAInfo());
4495 // deps[i].flags = NewDepKind;
4496 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4497 LValue FlagsLVal = CGF.EmitLValueForField(
4498 Base, *std::next(KmpDependInfoRD->field_begin(),
4499 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4501 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4502 FlagsLVal);
4503
4504 // Shift the address forward by one element.
4505 llvm::Value *ElementNext =
4506 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext")
4507 .emitRawPointer(CGF);
4508 ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock());
4509 llvm::Value *IsEmpty =
4510 CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty");
4511 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4512 // Done.
4513 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4514}
4515
4517 const OMPExecutableDirective &D,
4518 llvm::Function *TaskFunction,
4519 QualType SharedsTy, Address Shareds,
4520 const Expr *IfCond,
4521 const OMPTaskDataTy &Data) {
4522 if (!CGF.HaveInsertPoint())
4523 return;
4524
4526 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4527 llvm::Value *NewTask = Result.NewTask;
4528 llvm::Function *TaskEntry = Result.TaskEntry;
4529 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4530 LValue TDBase = Result.TDBase;
4531 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4532 // Process list of dependences.
4533 Address DependenciesArray = Address::invalid();
4534 llvm::Value *NumOfElements;
4535 std::tie(NumOfElements, DependenciesArray) =
4536 emitDependClause(CGF, Data.Dependences, Loc);
4537
4538 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4539 // libcall.
4540 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4541 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4542 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4543 // list is not empty
4544 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4545 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4546 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4547 llvm::Value *DepTaskArgs[7];
4548 if (!Data.Dependences.empty()) {
4549 DepTaskArgs[0] = UpLoc;
4550 DepTaskArgs[1] = ThreadID;
4551 DepTaskArgs[2] = NewTask;
4552 DepTaskArgs[3] = NumOfElements;
4553 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4554 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4555 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4556 }
4557 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4558 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4559 if (!Data.Tied) {
4560 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4561 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4562 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4563 }
4564 if (!Data.Dependences.empty()) {
4565 CGF.EmitRuntimeCall(
4566 OMPBuilder.getOrCreateRuntimeFunction(
4567 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4568 DepTaskArgs);
4569 } else {
4570 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4571 CGM.getModule(), OMPRTL___kmpc_omp_task),
4572 TaskArgs);
4573 }
4574 // Check if parent region is untied and build return for untied task;
4575 if (auto *Region =
4576 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4577 Region->emitUntiedSwitch(CGF);
4578 };
4579
4580 llvm::Value *DepWaitTaskArgs[7];
4581 if (!Data.Dependences.empty()) {
4582 DepWaitTaskArgs[0] = UpLoc;
4583 DepWaitTaskArgs[1] = ThreadID;
4584 DepWaitTaskArgs[2] = NumOfElements;
4585 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4586 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4587 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4588 DepWaitTaskArgs[6] =
4589 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4590 }
4591 auto &M = CGM.getModule();
4592 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4593 TaskEntry, &Data, &DepWaitTaskArgs,
4594 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4595 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4596 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4597 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4598 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4599 // is specified.
4600 if (!Data.Dependences.empty())
4601 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4602 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4603 DepWaitTaskArgs);
4604 // Call proxy_task_entry(gtid, new_task);
4605 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4606 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4607 Action.Enter(CGF);
4608 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4609 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4610 OutlinedFnArgs);
4611 };
4612
4613 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4614 // kmp_task_t *new_task);
4615 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4616 // kmp_task_t *new_task);
4618 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4619 M, OMPRTL___kmpc_omp_task_begin_if0),
4620 TaskArgs,
4621 OMPBuilder.getOrCreateRuntimeFunction(
4622 M, OMPRTL___kmpc_omp_task_complete_if0),
4623 TaskArgs);
4624 RCG.setAction(Action);
4625 RCG(CGF);
4626 };
4627
4628 if (IfCond) {
4629 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4630 } else {
4631 RegionCodeGenTy ThenRCG(ThenCodeGen);
4632 ThenRCG(CGF);
4633 }
4634}
4635
4637 const OMPLoopDirective &D,
4638 llvm::Function *TaskFunction,
4639 QualType SharedsTy, Address Shareds,
4640 const Expr *IfCond,
4641 const OMPTaskDataTy &Data) {
4642 if (!CGF.HaveInsertPoint())
4643 return;
4645 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4646 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4647 // libcall.
4648 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4649 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4650 // sched, kmp_uint64 grainsize, void *task_dup);
4651 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4652 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4653 llvm::Value *IfVal;
4654 if (IfCond) {
4655 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4656 /*isSigned=*/true);
4657 } else {
4658 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4659 }
4660
4661 LValue LBLVal = CGF.EmitLValueForField(
4662 Result.TDBase,
4663 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4664 const auto *LBVar =
4665 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4666 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4667 /*IsInitializer=*/true);
4668 LValue UBLVal = CGF.EmitLValueForField(
4669 Result.TDBase,
4670 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4671 const auto *UBVar =
4672 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4673 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4674 /*IsInitializer=*/true);
4675 LValue StLVal = CGF.EmitLValueForField(
4676 Result.TDBase,
4677 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4678 const auto *StVar =
4679 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4680 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4681 /*IsInitializer=*/true);
4682 // Store reductions address.
4683 LValue RedLVal = CGF.EmitLValueForField(
4684 Result.TDBase,
4685 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4686 if (Data.Reductions) {
4687 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4688 } else {
4689 CGF.EmitNullInitialization(RedLVal.getAddress(),
4690 CGF.getContext().VoidPtrTy);
4691 }
4692 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4694 UpLoc,
4695 ThreadID,
4696 Result.NewTask,
4697 IfVal,
4698 LBLVal.getPointer(CGF),
4699 UBLVal.getPointer(CGF),
4700 CGF.EmitLoadOfScalar(StLVal, Loc),
4701 llvm::ConstantInt::getSigned(
4702 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4703 llvm::ConstantInt::getSigned(
4704 CGF.IntTy, Data.Schedule.getPointer()
4705 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4706 : NoSchedule),
4707 Data.Schedule.getPointer()
4708 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4709 /*isSigned=*/false)
4710 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
4711 if (Data.HasModifier)
4712 TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));
4713
4714 TaskArgs.push_back(Result.TaskDupFn
4716 Result.TaskDupFn, CGF.VoidPtrTy)
4717 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
4718 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4719 CGM.getModule(), Data.HasModifier
4720 ? OMPRTL___kmpc_taskloop_5
4721 : OMPRTL___kmpc_taskloop),
4722 TaskArgs);
4723}
4724
4725/// Emit reduction operation for each element of array (required for
4726/// array sections) LHS op = RHS.
4727/// \param Type Type of array.
4728/// \param LHSVar Variable on the left side of the reduction operation
4729/// (references element of array in original variable).
4730/// \param RHSVar Variable on the right side of the reduction operation
4731/// (references element of array in original variable).
4732/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4733/// RHSVar.
4735 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4736 const VarDecl *RHSVar,
4737 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4738 const Expr *, const Expr *)> &RedOpGen,
4739 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4740 const Expr *UpExpr = nullptr) {
4741 // Perform element-by-element initialization.
4742 QualType ElementTy;
4743 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4744 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4745
4746 // Drill down to the base element type on both arrays.
4747 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4748 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4749
4750 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4751 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4752 // Cast from pointer to array type to pointer to single element.
4753 llvm::Value *LHSEnd =
4754 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4755 // The basic structure here is a while-do loop.
4756 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4757 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4758 llvm::Value *IsEmpty =
4759 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4760 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4761
4762 // Enter the loop body, making that address the current address.
4763 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4764 CGF.EmitBlock(BodyBB);
4765
4766 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4767
4768 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4769 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4770 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4771 Address RHSElementCurrent(
4772 RHSElementPHI, RHSAddr.getElementType(),
4773 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4774
4775 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4776 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4777 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4778 Address LHSElementCurrent(
4779 LHSElementPHI, LHSAddr.getElementType(),
4780 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4781
4782 // Emit copy.
4784 Scope.addPrivate(LHSVar, LHSElementCurrent);
4785 Scope.addPrivate(RHSVar, RHSElementCurrent);
4786 Scope.Privatize();
4787 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4788 Scope.ForceCleanup();
4789
4790 // Shift the address forward by one element.
4791 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4792 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4793 "omp.arraycpy.dest.element");
4794 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4795 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4796 "omp.arraycpy.src.element");
4797 // Check whether we've reached the end.
4798 llvm::Value *Done =
4799 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4800 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4801 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4802 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4803
4804 // Done.
4805 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4806}
4807
4808/// Emit reduction combiner. If the combiner is a simple expression emit it as
4809/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4810/// UDR combiner function.
4812 const Expr *ReductionOp) {
4813 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4814 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4815 if (const auto *DRE =
4816 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4817 if (const auto *DRD =
4818 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4819 std::pair<llvm::Function *, llvm::Function *> Reduction =
4823 CGF.EmitIgnoredExpr(ReductionOp);
4824 return;
4825 }
4826 CGF.EmitIgnoredExpr(ReductionOp);
4827}
4828
4830 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4832 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4833 ASTContext &C = CGM.getContext();
4834
4835 // void reduction_func(void *LHSArg, void *RHSArg);
4836 FunctionArgList Args;
4837 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4839 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4841 Args.push_back(&LHSArg);
4842 Args.push_back(&RHSArg);
4843 const auto &CGFI =
4844 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4845 std::string Name = getReductionFuncName(ReducerName);
4846 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4847 llvm::GlobalValue::InternalLinkage, Name,
4848 &CGM.getModule());
4849 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
4850 Fn->setDoesNotRecurse();
4851 CodeGenFunction CGF(CGM);
4852 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
4853
4854 // Dst = (void*[n])(LHSArg);
4855 // Src = (void*[n])(RHSArg);
4857 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4858 CGF.Builder.getPtrTy(0)),
4859 ArgsElemType, CGF.getPointerAlign());
4861 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4862 CGF.Builder.getPtrTy(0)),
4863 ArgsElemType, CGF.getPointerAlign());
4864
4865 // ...
4866 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4867 // ...
4869 const auto *IPriv = Privates.begin();
4870 unsigned Idx = 0;
4871 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4872 const auto *RHSVar =
4873 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4874 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
4875 const auto *LHSVar =
4876 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4877 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
4878 QualType PrivTy = (*IPriv)->getType();
4879 if (PrivTy->isVariablyModifiedType()) {
4880 // Get array size and emit VLA type.
4881 ++Idx;
4882 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
4883 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4884 const VariableArrayType *VLA =
4885 CGF.getContext().getAsVariableArrayType(PrivTy);
4886 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4888 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4889 CGF.EmitVariablyModifiedType(PrivTy);
4890 }
4891 }
4892 Scope.Privatize();
4893 IPriv = Privates.begin();
4894 const auto *ILHS = LHSExprs.begin();
4895 const auto *IRHS = RHSExprs.begin();
4896 for (const Expr *E : ReductionOps) {
4897 if ((*IPriv)->getType()->isArrayType()) {
4898 // Emit reduction for array section.
4899 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4900 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4902 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4903 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4904 emitReductionCombiner(CGF, E);
4905 });
4906 } else {
4907 // Emit reduction for array subscript or single variable.
4908 emitReductionCombiner(CGF, E);
4909 }
4910 ++IPriv;
4911 ++ILHS;
4912 ++IRHS;
4913 }
4914 Scope.ForceCleanup();
4915 CGF.FinishFunction();
4916 return Fn;
4917}
4918
4920 const Expr *ReductionOp,
4921 const Expr *PrivateRef,
4922 const DeclRefExpr *LHS,
4923 const DeclRefExpr *RHS) {
4924 if (PrivateRef->getType()->isArrayType()) {
4925 // Emit reduction for array section.
4926 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4927 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4929 CGF, PrivateRef->getType(), LHSVar, RHSVar,
4930 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4931 emitReductionCombiner(CGF, ReductionOp);
4932 });
4933 } else {
4934 // Emit reduction for array subscript or single variable.
4935 emitReductionCombiner(CGF, ReductionOp);
4936 }
4937}
4938
4939static std::string generateUniqueName(CodeGenModule &CGM,
4940 llvm::StringRef Prefix, const Expr *Ref);
4941
4943 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4944 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4945
4946 // Create a shared global variable (__shared_reduction_var) to accumulate the
4947 // final result.
4948 //
4949 // Call __kmpc_barrier to synchronize threads before initialization.
4950 //
4951 // The master thread (thread_id == 0) initializes __shared_reduction_var
4952 // with the identity value or initializer.
4953 //
4954 // Call __kmpc_barrier to synchronize before combining.
4955 // For each i:
4956 // - Thread enters critical section.
4957 // - Reads its private value from LHSExprs[i].
4958 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4959 // Privates[i]).
4960 // - Exits critical section.
4961 //
4962 // Call __kmpc_barrier after combining.
4963 //
4964 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4965 //
4966 // Final __kmpc_barrier to synchronize after broadcasting
4967 QualType PrivateType = Privates->getType();
4968 llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
4969
4970 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
4971 std::string ReductionVarNameStr;
4972 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts()))
4973 ReductionVarNameStr =
4974 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4975 else
4976 ReductionVarNameStr = "unnamed_priv_var";
4977
4978 // Create an internal shared variable
4979 std::string SharedName =
4980 CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
4981 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4982 LLVMType, ".omp.reduction." + SharedName);
4983
4984 SharedVar->setAlignment(
4985 llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
4986
4987 Address SharedResult =
4988 CGF.MakeNaturalAlignRawAddrLValue(SharedVar, PrivateType).getAddress();
4989
4990 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4991 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4992 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4993
4994 llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
4995 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
4996
4997 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4998 ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
4999 CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
5000
5001 CGF.EmitBlock(InitBB);
5002
5003 auto EmitSharedInit = [&]() {
5004 if (UDR) { // Check if it's a User-Defined Reduction
5005 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
5006 std::pair<llvm::Function *, llvm::Function *> FnPair =
5008 llvm::Function *InitializerFn = FnPair.second;
5009 if (InitializerFn) {
5010 if (const auto *CE =
5011 dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
5012 const auto *OutDRE = cast<DeclRefExpr>(
5013 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5014 ->getSubExpr());
5015 const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
5016
5017 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5018 LocalScope.addPrivate(OutVD, SharedResult);
5019
5020 (void)LocalScope.Privatize();
5021 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
5022 CE->getCallee()->IgnoreParenImpCasts())) {
5024 CGF, OVE, RValue::get(InitializerFn));
5025 CGF.EmitIgnoredExpr(CE);
5026 } else {
5027 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5028 PrivateType.getQualifiers(),
5029 /*IsInitializer=*/true);
5030 }
5031 } else {
5032 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5033 PrivateType.getQualifiers(),
5034 /*IsInitializer=*/true);
5035 }
5036 } else {
5037 CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
5038 PrivateType.getQualifiers(),
5039 /*IsInitializer=*/true);
5040 }
5041 } else {
5042 // EmitNullInitialization handles default construction for C++ classes
5043 // and zeroing for scalars, which is a reasonable default.
5044 CGF.EmitNullInitialization(SharedResult, PrivateType);
5045 }
5046 return; // UDR initialization handled
5047 }
5048 if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
5049 if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
5050 if (const Expr *InitExpr = VD->getInit()) {
5051 CGF.EmitAnyExprToMem(InitExpr, SharedResult,
5052 PrivateType.getQualifiers(), true);
5053 return;
5054 }
5055 }
5056 }
5057 CGF.EmitNullInitialization(SharedResult, PrivateType);
5058 };
5059 EmitSharedInit();
5060 CGF.Builder.CreateBr(InitEndBB);
5061 CGF.EmitBlock(InitEndBB);
5062
5063 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5064 CGM.getModule(), OMPRTL___kmpc_barrier),
5065 BarrierArgs);
5066
5067 const Expr *ReductionOp = ReductionOps;
5068 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5069 LValue SharedLV = CGF.MakeAddrLValue(SharedResult, PrivateType);
5070 LValue LHSLV = CGF.EmitLValue(Privates);
5071
5072 auto EmitCriticalReduction = [&](auto ReductionGen) {
5073 std::string CriticalName = getName({"reduction_critical"});
5074 emitCriticalRegion(CGF, CriticalName, ReductionGen, Loc);
5075 };
5076
5077 if (CurrentUDR) {
5078 // Handle user-defined reduction.
5079 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5080 Action.Enter(CGF);
5081 std::pair<llvm::Function *, llvm::Function *> FnPair =
5082 getUserDefinedReduction(CurrentUDR);
5083 if (FnPair.first) {
5084 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) {
5085 const auto *OutDRE = cast<DeclRefExpr>(
5086 cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
5087 ->getSubExpr());
5088 const auto *InDRE = cast<DeclRefExpr>(
5089 cast<UnaryOperator>(CE->getArg(1)->IgnoreParenImpCasts())
5090 ->getSubExpr());
5091 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5092 LocalScope.addPrivate(cast<VarDecl>(OutDRE->getDecl()),
5093 SharedLV.getAddress());
5094 LocalScope.addPrivate(cast<VarDecl>(InDRE->getDecl()),
5095 LHSLV.getAddress());
5096 (void)LocalScope.Privatize();
5097 emitReductionCombiner(CGF, ReductionOp);
5098 }
5099 }
5100 };
5101 EmitCriticalReduction(ReductionGen);
5102 } else {
5103 // Handle built-in reduction operations.
5104#ifndef NDEBUG
5105 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5106 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5107 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5108
5109 const Expr *AssignRHS = nullptr;
5110 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5111 if (BinOp->getOpcode() == BO_Assign)
5112 AssignRHS = BinOp->getRHS();
5113 } else if (const auto *OpCall =
5114 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5115 if (OpCall->getOperator() == OO_Equal)
5116 AssignRHS = OpCall->getArg(1);
5117 }
5118
5119 assert(AssignRHS &&
5120 "Private Variable Reduction : Invalid ReductionOp expression");
5121#endif
5122
5123 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5124 Action.Enter(CGF);
5125 const auto *OmpOutDRE =
5126 dyn_cast<DeclRefExpr>(LHSExprs->IgnoreParenImpCasts());
5127 const auto *OmpInDRE =
5128 dyn_cast<DeclRefExpr>(RHSExprs->IgnoreParenImpCasts());
5129 assert(
5130 OmpOutDRE && OmpInDRE &&
5131 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5132 const VarDecl *OmpOutVD = cast<VarDecl>(OmpOutDRE->getDecl());
5133 const VarDecl *OmpInVD = cast<VarDecl>(OmpInDRE->getDecl());
5134 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5135 LocalScope.addPrivate(OmpOutVD, SharedLV.getAddress());
5136 LocalScope.addPrivate(OmpInVD, LHSLV.getAddress());
5137 (void)LocalScope.Privatize();
5138 // Emit the actual reduction operation
5139 CGF.EmitIgnoredExpr(ReductionOp);
5140 };
5141 EmitCriticalReduction(ReductionGen);
5142 }
5143
5144 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5145 CGM.getModule(), OMPRTL___kmpc_barrier),
5146 BarrierArgs);
5147
5148 // Broadcast final result
5149 bool IsAggregate = PrivateType->isAggregateType();
5150 LValue SharedLV1 = CGF.MakeAddrLValue(SharedResult, PrivateType);
5151 llvm::Value *FinalResultVal = nullptr;
5152 Address FinalResultAddr = Address::invalid();
5153
5154 if (IsAggregate)
5155 FinalResultAddr = SharedResult;
5156 else
5157 FinalResultVal = CGF.EmitLoadOfScalar(SharedLV1, Loc);
5158
5159 LValue TargetLHSLV = CGF.EmitLValue(RHSExprs);
5160 if (IsAggregate) {
5161 CGF.EmitAggregateCopy(TargetLHSLV,
5162 CGF.MakeAddrLValue(FinalResultAddr, PrivateType),
5163 PrivateType, AggValueSlot::DoesNotOverlap, false);
5164 } else {
5165 CGF.EmitStoreOfScalar(FinalResultVal, TargetLHSLV);
5166 }
5167 // Final synchronization barrier
5168 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5169 CGM.getModule(), OMPRTL___kmpc_barrier),
5170 BarrierArgs);
5171
5172 // Combiner with original list item
5173 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5174 PrePostActionTy &Action) {
5175 Action.Enter(CGF);
5176 emitSingleReductionCombiner(CGF, ReductionOps, Privates,
5177 cast<DeclRefExpr>(LHSExprs),
5178 cast<DeclRefExpr>(RHSExprs));
5179 };
5180 EmitCriticalReduction(OriginalListCombiner);
5181}
5182
5184 ArrayRef<const Expr *> OrgPrivates,
5185 ArrayRef<const Expr *> OrgLHSExprs,
5186 ArrayRef<const Expr *> OrgRHSExprs,
5187 ArrayRef<const Expr *> OrgReductionOps,
5188 ReductionOptionsTy Options) {
5189 if (!CGF.HaveInsertPoint())
5190 return;
5191
5192 bool WithNowait = Options.WithNowait;
5193 bool SimpleReduction = Options.SimpleReduction;
5194
5195 // Next code should be emitted for reduction:
5196 //
5197 // static kmp_critical_name lock = { 0 };
5198 //
5199 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5200 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5201 // ...
5202 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5203 // *(Type<n>-1*)rhs[<n>-1]);
5204 // }
5205 //
5206 // ...
5207 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5208 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5209 // RedList, reduce_func, &<lock>)) {
5210 // case 1:
5211 // ...
5212 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5213 // ...
5214 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5215 // break;
5216 // case 2:
5217 // ...
5218 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5219 // ...
5220 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5221 // break;
5222 // default:;
5223 // }
5224 //
5225 // if SimpleReduction is true, only the next code is generated:
5226 // ...
5227 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5228 // ...
5229
5230 ASTContext &C = CGM.getContext();
5231
5232 if (SimpleReduction) {
5234 const auto *IPriv = OrgPrivates.begin();
5235 const auto *ILHS = OrgLHSExprs.begin();
5236 const auto *IRHS = OrgRHSExprs.begin();
5237 for (const Expr *E : OrgReductionOps) {
5238 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5239 cast<DeclRefExpr>(*IRHS));
5240 ++IPriv;
5241 ++ILHS;
5242 ++IRHS;
5243 }
5244 return;
5245 }
5246
5247 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5248 // Only keep entries where the corresponding variable is not private.
5249 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5250 FilteredRHSExprs, FilteredReductionOps;
5251 for (unsigned I : llvm::seq<unsigned>(
5252 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5253 if (!Options.IsPrivateVarReduction[I]) {
5254 FilteredPrivates.emplace_back(OrgPrivates[I]);
5255 FilteredLHSExprs.emplace_back(OrgLHSExprs[I]);
5256 FilteredRHSExprs.emplace_back(OrgRHSExprs[I]);
5257 FilteredReductionOps.emplace_back(OrgReductionOps[I]);
5258 }
5259 }
5260 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5261 // processing.
5262 ArrayRef<const Expr *> Privates = FilteredPrivates;
5263 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5264 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5265 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5266
5267 // 1. Build a list of reduction variables.
5268 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5269 auto Size = RHSExprs.size();
5270 for (const Expr *E : Privates) {
5271 if (E->getType()->isVariablyModifiedType())
5272 // Reserve place for array size.
5273 ++Size;
5274 }
5275 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5276 QualType ReductionArrayTy = C.getConstantArrayType(
5277 C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal,
5278 /*IndexTypeQuals=*/0);
5279 RawAddress ReductionList =
5280 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5281 const auto *IPriv = Privates.begin();
5282 unsigned Idx = 0;
5283 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5284 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5285 CGF.Builder.CreateStore(
5287 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5288 Elem);
5289 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5290 // Store array size.
5291 ++Idx;
5292 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5293 llvm::Value *Size = CGF.Builder.CreateIntCast(
5294 CGF.getVLASize(
5295 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5296 .NumElts,
5297 CGF.SizeTy, /*isSigned=*/false);
5298 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5299 Elem);
5300 }
5301 }
5302
5303 // 2. Emit reduce_func().
5304 llvm::Function *ReductionFn = emitReductionFunction(
5305 CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5306 Privates, LHSExprs, RHSExprs, ReductionOps);
5307
5308 // 3. Create static kmp_critical_name lock = { 0 };
5309 std::string Name = getName({"reduction"});
5310 llvm::Value *Lock = getCriticalRegionLock(Name);
5311
5312 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5313 // RedList, reduce_func, &<lock>);
5314 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5315 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5316 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5317 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5318 ReductionList.getPointer(), CGF.VoidPtrTy);
5319 llvm::Value *Args[] = {
5320 IdentTLoc, // ident_t *<loc>
5321 ThreadId, // i32 <gtid>
5322 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5323 ReductionArrayTySize, // size_type sizeof(RedList)
5324 RL, // void *RedList
5325 ReductionFn, // void (*) (void *, void *) <reduce_func>
5326 Lock // kmp_critical_name *&<lock>
5327 };
5328 llvm::Value *Res = CGF.EmitRuntimeCall(
5329 OMPBuilder.getOrCreateRuntimeFunction(
5330 CGM.getModule(),
5331 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5332 Args);
5333
5334 // 5. Build switch(res)
5335 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5336 llvm::SwitchInst *SwInst =
5337 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5338
5339 // 6. Build case 1:
5340 // ...
5341 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5342 // ...
5343 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5344 // break;
5345 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5346 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5347 CGF.EmitBlock(Case1BB);
5348
5349 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5350 llvm::Value *EndArgs[] = {
5351 IdentTLoc, // ident_t *<loc>
5352 ThreadId, // i32 <gtid>
5353 Lock // kmp_critical_name *&<lock>
5354 };
5355 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5356 CodeGenFunction &CGF, PrePostActionTy &Action) {
5358 const auto *IPriv = Privates.begin();
5359 const auto *ILHS = LHSExprs.begin();
5360 const auto *IRHS = RHSExprs.begin();
5361 for (const Expr *E : ReductionOps) {
5362 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5363 cast<DeclRefExpr>(*IRHS));
5364 ++IPriv;
5365 ++ILHS;
5366 ++IRHS;
5367 }
5368 };
5370 CommonActionTy Action(
5371 nullptr, {},
5372 OMPBuilder.getOrCreateRuntimeFunction(
5373 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5374 : OMPRTL___kmpc_end_reduce),
5375 EndArgs);
5376 RCG.setAction(Action);
5377 RCG(CGF);
5378
5379 CGF.EmitBranch(DefaultBB);
5380
5381 // 7. Build case 2:
5382 // ...
5383 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5384 // ...
5385 // break;
5386 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5387 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5388 CGF.EmitBlock(Case2BB);
5389
5390 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5391 CodeGenFunction &CGF, PrePostActionTy &Action) {
5392 const auto *ILHS = LHSExprs.begin();
5393 const auto *IRHS = RHSExprs.begin();
5394 const auto *IPriv = Privates.begin();
5395 for (const Expr *E : ReductionOps) {
5396 const Expr *XExpr = nullptr;
5397 const Expr *EExpr = nullptr;
5398 const Expr *UpExpr = nullptr;
5399 BinaryOperatorKind BO = BO_Comma;
5400 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5401 if (BO->getOpcode() == BO_Assign) {
5402 XExpr = BO->getLHS();
5403 UpExpr = BO->getRHS();
5404 }
5405 }
5406 // Try to emit update expression as a simple atomic.
5407 const Expr *RHSExpr = UpExpr;
5408 if (RHSExpr) {
5409 // Analyze RHS part of the whole expression.
5410 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5411 RHSExpr->IgnoreParenImpCasts())) {
5412 // If this is a conditional operator, analyze its condition for
5413 // min/max reduction operator.
5414 RHSExpr = ACO->getCond();
5415 }
5416 if (const auto *BORHS =
5417 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5418 EExpr = BORHS->getRHS();
5419 BO = BORHS->getOpcode();
5420 }
5421 }
5422 if (XExpr) {
5423 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5424 auto &&AtomicRedGen = [BO, VD,
5425 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5426 const Expr *EExpr, const Expr *UpExpr) {
5427 LValue X = CGF.EmitLValue(XExpr);
5428 RValue E;
5429 if (EExpr)
5430 E = CGF.EmitAnyExpr(EExpr);
5431 CGF.EmitOMPAtomicSimpleUpdateExpr(
5432 X, E, BO, /*IsXLHSInRHSPart=*/true,
5433 llvm::AtomicOrdering::Monotonic, Loc,
5434 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5435 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5436 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5437 CGF.emitOMPSimpleStore(
5438 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5439 VD->getType().getNonReferenceType(), Loc);
5440 PrivateScope.addPrivate(VD, LHSTemp);
5441 (void)PrivateScope.Privatize();
5442 return CGF.EmitAnyExpr(UpExpr);
5443 });
5444 };
5445 if ((*IPriv)->getType()->isArrayType()) {
5446 // Emit atomic reduction for array section.
5447 const auto *RHSVar =
5448 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5449 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5450 AtomicRedGen, XExpr, EExpr, UpExpr);
5451 } else {
5452 // Emit atomic reduction for array subscript or single variable.
5453 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5454 }
5455 } else {
5456 // Emit as a critical region.
5457 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5458 const Expr *, const Expr *) {
5459 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5460 std::string Name = RT.getName({"atomic_reduction"});
5462 CGF, Name,
5463 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5464 Action.Enter(CGF);
5465 emitReductionCombiner(CGF, E);
5466 },
5467 Loc);
5468 };
5469 if ((*IPriv)->getType()->isArrayType()) {
5470 const auto *LHSVar =
5471 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5472 const auto *RHSVar =
5473 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5474 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5475 CritRedGen);
5476 } else {
5477 CritRedGen(CGF, nullptr, nullptr, nullptr);
5478 }
5479 }
5480 ++ILHS;
5481 ++IRHS;
5482 ++IPriv;
5483 }
5484 };
5485 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5486 if (!WithNowait) {
5487 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5488 llvm::Value *EndArgs[] = {
5489 IdentTLoc, // ident_t *<loc>
5490 ThreadId, // i32 <gtid>
5491 Lock // kmp_critical_name *&<lock>
5492 };
5493 CommonActionTy Action(nullptr, {},
5494 OMPBuilder.getOrCreateRuntimeFunction(
5495 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5496 EndArgs);
5497 AtomicRCG.setAction(Action);
5498 AtomicRCG(CGF);
5499 } else {
5500 AtomicRCG(CGF);
5501 }
5502
5503 CGF.EmitBranch(DefaultBB);
5504 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5505 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5506 "PrivateVarReduction: Privates size mismatch");
5507 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5508 "PrivateVarReduction: ReductionOps size mismatch");
5509 for (unsigned I : llvm::seq<unsigned>(
5510 std::min(OrgReductionOps.size(), OrgLHSExprs.size()))) {
5511 if (Options.IsPrivateVarReduction[I])
5512 emitPrivateReduction(CGF, Loc, OrgPrivates[I], OrgLHSExprs[I],
5513 OrgRHSExprs[I], OrgReductionOps[I]);
5514 }
5515}
5516
5517/// Generates unique name for artificial threadprivate variables.
5518/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5519static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5520 const Expr *Ref) {
5521 SmallString<256> Buffer;
5522 llvm::raw_svector_ostream Out(Buffer);
5523 const clang::DeclRefExpr *DE;
5524 const VarDecl *D = ::getBaseDecl(Ref, DE);
5525 if (!D)
5526 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5527 D = D->getCanonicalDecl();
5528 std::string Name = CGM.getOpenMPRuntime().getName(
5529 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5530 Out << Prefix << Name << "_"
5532 return std::string(Out.str());
5533}
5534
5535/// Emits reduction initializer function:
5536/// \code
5537/// void @.red_init(void* %arg, void* %orig) {
5538/// %0 = bitcast void* %arg to <type>*
5539/// store <type> <init>, <type>* %0
5540/// ret void
5541/// }
5542/// \endcode
5543static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5544 SourceLocation Loc,
5545 ReductionCodeGen &RCG, unsigned N) {
5546 ASTContext &C = CGM.getContext();
5547 QualType VoidPtrTy = C.VoidPtrTy;
5548 VoidPtrTy.addRestrict();
5549 FunctionArgList Args;
5550 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5552 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5554 Args.emplace_back(&Param);
5555 Args.emplace_back(&ParamOrig);
5556 const auto &FnInfo =
5557 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5558 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5559 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5560 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5561 Name, &CGM.getModule());
5562 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5563 Fn->setDoesNotRecurse();
5564 CodeGenFunction CGF(CGM);
5565 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5566 QualType PrivateType = RCG.getPrivateType(N);
5567 Address PrivateAddr = CGF.EmitLoadOfPointer(
5568 CGF.GetAddrOfLocalVar(&Param).withElementType(CGF.Builder.getPtrTy(0)),
5569 C.getPointerType(PrivateType)->castAs<PointerType>());
5570 llvm::Value *Size = nullptr;
5571 // If the size of the reduction item is non-constant, load it from global
5572 // threadprivate variable.
5573 if (RCG.getSizes(N).second) {
5575 CGF, CGM.getContext().getSizeType(),
5576 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5577 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5578 CGM.getContext().getSizeType(), Loc);
5579 }
5580 RCG.emitAggregateType(CGF, N, Size);
5581 Address OrigAddr = Address::invalid();
5582 // If initializer uses initializer from declare reduction construct, emit a
5583 // pointer to the address of the original reduction item (reuired by reduction
5584 // initializer)
5585 if (RCG.usesReductionInitializer(N)) {
5586 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5587 OrigAddr = CGF.EmitLoadOfPointer(
5588 SharedAddr,
5589 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5590 }
5591 // Emit the initializer:
5592 // %0 = bitcast void* %arg to <type>*
5593 // store <type> <init>, <type>* %0
5594 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5595 [](CodeGenFunction &) { return false; });
5596 CGF.FinishFunction();
5597 return Fn;
5598}
5599
5600/// Emits reduction combiner function:
5601/// \code
5602/// void @.red_comb(void* %arg0, void* %arg1) {
5603/// %lhs = bitcast void* %arg0 to <type>*
5604/// %rhs = bitcast void* %arg1 to <type>*
5605/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5606/// store <type> %2, <type>* %lhs
5607/// ret void
5608/// }
5609/// \endcode
5610static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5611 SourceLocation Loc,
5612 ReductionCodeGen &RCG, unsigned N,
5613 const Expr *ReductionOp,
5614 const Expr *LHS, const Expr *RHS,
5615 const Expr *PrivateRef) {
5616 ASTContext &C = CGM.getContext();
5617 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5618 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5619 FunctionArgList Args;
5620 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5621 C.VoidPtrTy, ImplicitParamKind::Other);
5622 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5624 Args.emplace_back(&ParamInOut);
5625 Args.emplace_back(&ParamIn);
5626 const auto &FnInfo =
5627 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5628 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5629 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5630 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5631 Name, &CGM.getModule());
5632 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5633 Fn->setDoesNotRecurse();
5634 CodeGenFunction CGF(CGM);
5635 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5636 llvm::Value *Size = nullptr;
5637 // If the size of the reduction item is non-constant, load it from global
5638 // threadprivate variable.
5639 if (RCG.getSizes(N).second) {
5641 CGF, CGM.getContext().getSizeType(),
5642 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5643 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5644 CGM.getContext().getSizeType(), Loc);
5645 }
5646 RCG.emitAggregateType(CGF, N, Size);
5647 // Remap lhs and rhs variables to the addresses of the function arguments.
5648 // %lhs = bitcast void* %arg0 to <type>*
5649 // %rhs = bitcast void* %arg1 to <type>*
5650 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5651 PrivateScope.addPrivate(
5652 LHSVD,
5653 // Pull out the pointer to the variable.
5655 CGF.GetAddrOfLocalVar(&ParamInOut)
5656 .withElementType(CGF.Builder.getPtrTy(0)),
5657 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5658 PrivateScope.addPrivate(
5659 RHSVD,
5660 // Pull out the pointer to the variable.
5662 CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5663 CGF.Builder.getPtrTy(0)),
5664 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5665 PrivateScope.Privatize();
5666 // Emit the combiner body:
5667 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5668 // store <type> %2, <type>* %lhs
5670 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5671 cast<DeclRefExpr>(RHS));
5672 CGF.FinishFunction();
5673 return Fn;
5674}
5675
5676/// Emits reduction finalizer function:
5677/// \code
5678/// void @.red_fini(void* %arg) {
5679/// %0 = bitcast void* %arg to <type>*
5680/// <destroy>(<type>* %0)
5681/// ret void
5682/// }
5683/// \endcode
5684static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5685 SourceLocation Loc,
5686 ReductionCodeGen &RCG, unsigned N) {
5687 if (!RCG.needCleanups(N))
5688 return nullptr;
5689 ASTContext &C = CGM.getContext();
5690 FunctionArgList Args;
5691 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5693 Args.emplace_back(&Param);
5694 const auto &FnInfo =
5695 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5696 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5697 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5698 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5699 Name, &CGM.getModule());
5700 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5701 Fn->setDoesNotRecurse();
5702 CodeGenFunction CGF(CGM);
5703 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5704 Address PrivateAddr = CGF.EmitLoadOfPointer(
5705 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5706 llvm::Value *Size = nullptr;
5707 // If the size of the reduction item is non-constant, load it from global
5708 // threadprivate variable.
5709 if (RCG.getSizes(N).second) {
5711 CGF, CGM.getContext().getSizeType(),
5712 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5713 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5714 CGM.getContext().getSizeType(), Loc);
5715 }
5716 RCG.emitAggregateType(CGF, N, Size);
5717 // Emit the finalizer body:
5718 // <destroy>(<type>* %0)
5719 RCG.emitCleanups(CGF, N, PrivateAddr);
5720 CGF.FinishFunction(Loc);
5721 return Fn;
5722}
5723
5726 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5727 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5728 return nullptr;
5729
5730 // Build typedef struct:
5731 // kmp_taskred_input {
5732 // void *reduce_shar; // shared reduction item
5733 // void *reduce_orig; // original reduction item used for initialization
5734 // size_t reduce_size; // size of data item
5735 // void *reduce_init; // data initialization routine
5736 // void *reduce_fini; // data finalization routine
5737 // void *reduce_comb; // data combiner routine
5738 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5739 // } kmp_taskred_input_t;
5740 ASTContext &C = CGM.getContext();
5741 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5742 RD->startDefinition();
5743 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5744 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5745 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5746 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5747 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5748 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5749 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5750 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5751 RD->completeDefinition();
5752 CanQualType RDType = C.getCanonicalTagType(RD);
5753 unsigned Size = Data.ReductionVars.size();
5754 llvm::APInt ArraySize(/*numBits=*/64, Size);
5755 QualType ArrayRDType =
5756 C.getConstantArrayType(RDType, ArraySize, nullptr,
5757 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5758 // kmp_task_red_input_t .rd_input.[Size];
5759 RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5760 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5761 Data.ReductionCopies, Data.ReductionOps);
5762 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5763 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5764 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5765 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5766 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5767 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5768 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5769 ".rd_input.gep.");
5770 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType);
5771 // ElemLVal.reduce_shar = &Shareds[Cnt];
5772 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5773 RCG.emitSharedOrigLValue(CGF, Cnt);
5774 llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF);
5775 CGF.EmitStoreOfScalar(Shared, SharedLVal);
5776 // ElemLVal.reduce_orig = &Origs[Cnt];
5777 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5778 llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF);
5779 CGF.EmitStoreOfScalar(Orig, OrigLVal);
5780 RCG.emitAggregateType(CGF, Cnt);
5781 llvm::Value *SizeValInChars;
5782 llvm::Value *SizeVal;
5783 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5784 // We use delayed creation/initialization for VLAs and array sections. It is
5785 // required because runtime does not provide the way to pass the sizes of
5786 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5787 // threadprivate global variables are used to store these values and use
5788 // them in the functions.
5789 bool DelayedCreation = !!SizeVal;
5790 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5791 /*isSigned=*/false);
5792 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5793 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5794 // ElemLVal.reduce_init = init;
5795 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5796 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt);
5797 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5798 // ElemLVal.reduce_fini = fini;
5799 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5800 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5801 llvm::Value *FiniAddr =
5802 Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5803 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5804 // ElemLVal.reduce_comb = comb;
5805 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5806 llvm::Value *CombAddr = emitReduceCombFunction(
5807 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5808 RHSExprs[Cnt], Data.ReductionCopies[Cnt]);
5809 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5810 // ElemLVal.flags = 0;
5811 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5812 if (DelayedCreation) {
5814 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5815 FlagsLVal);
5816 } else
5817 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5818 }
5819 if (Data.IsReductionWithTaskMod) {
5820 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5821 // is_ws, int num, void *data);
5822 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5823 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5824 CGM.IntTy, /*isSigned=*/true);
5825 llvm::Value *Args[] = {
5826 IdentTLoc, GTid,
5827 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5828 /*isSigned=*/true),
5829 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5831 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5832 return CGF.EmitRuntimeCall(
5833 OMPBuilder.getOrCreateRuntimeFunction(
5834 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5835 Args);
5836 }
5837 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5838 llvm::Value *Args[] = {
5839 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5840 /*isSigned=*/true),
5841 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5843 CGM.VoidPtrTy)};
5844 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5845 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5846 Args);
5847}
5848
5850 SourceLocation Loc,
5851 bool IsWorksharingReduction) {
5852 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5853 // is_ws, int num, void *data);
5854 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5855 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5856 CGM.IntTy, /*isSigned=*/true);
5857 llvm::Value *Args[] = {IdentTLoc, GTid,
5858 llvm::ConstantInt::get(CGM.IntTy,
5859 IsWorksharingReduction ? 1 : 0,
5860 /*isSigned=*/true)};
5861 (void)CGF.EmitRuntimeCall(
5862 OMPBuilder.getOrCreateRuntimeFunction(
5863 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5864 Args);
5865}
5866
5868 SourceLocation Loc,
5869 ReductionCodeGen &RCG,
5870 unsigned N) {
5871 auto Sizes = RCG.getSizes(N);
5872 // Emit threadprivate global variable if the type is non-constant
5873 // (Sizes.second = nullptr).
5874 if (Sizes.second) {
5875 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5876 /*isSigned=*/false);
5878 CGF, CGM.getContext().getSizeType(),
5879 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5880 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5881 }
5882}
5883
5885 SourceLocation Loc,
5886 llvm::Value *ReductionsPtr,
5887 LValue SharedLVal) {
5888 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5889 // *d);
5890 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5891 CGM.IntTy,
5892 /*isSigned=*/true),
5893 ReductionsPtr,
5895 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5896 return Address(
5897 CGF.EmitRuntimeCall(
5898 OMPBuilder.getOrCreateRuntimeFunction(
5899 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5900 Args),
5901 CGF.Int8Ty, SharedLVal.getAlignment());
5902}
5903
5905 const OMPTaskDataTy &Data) {
5906 if (!CGF.HaveInsertPoint())
5907 return;
5908
5909 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5910 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5911 OMPBuilder.createTaskwait(CGF.Builder);
5912 } else {
5913 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5914 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5915 auto &M = CGM.getModule();
5916 Address DependenciesArray = Address::invalid();
5917 llvm::Value *NumOfElements;
5918 std::tie(NumOfElements, DependenciesArray) =
5919 emitDependClause(CGF, Data.Dependences, Loc);
5920 if (!Data.Dependences.empty()) {
5921 llvm::Value *DepWaitTaskArgs[7];
5922 DepWaitTaskArgs[0] = UpLoc;
5923 DepWaitTaskArgs[1] = ThreadID;
5924 DepWaitTaskArgs[2] = NumOfElements;
5925 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5926 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5927 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5928 DepWaitTaskArgs[6] =
5929 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5930
5931 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5932
5933 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5934 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5935 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5936 // kmp_int32 has_no_wait); if dependence info is specified.
5937 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5938 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5939 DepWaitTaskArgs);
5940
5941 } else {
5942
5943 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5944 // global_tid);
5945 llvm::Value *Args[] = {UpLoc, ThreadID};
5946 // Ignore return result until untied tasks are supported.
5947 CGF.EmitRuntimeCall(
5948 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5949 Args);
5950 }
5951 }
5952
5953 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5954 Region->emitUntiedSwitch(CGF);
5955}
5956
5958 OpenMPDirectiveKind InnerKind,
5959 const RegionCodeGenTy &CodeGen,
5960 bool HasCancel) {
5961 if (!CGF.HaveInsertPoint())
5962 return;
5963 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5964 InnerKind != OMPD_critical &&
5965 InnerKind != OMPD_master &&
5966 InnerKind != OMPD_masked);
5967 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5968}
5969
5970namespace {
5971enum RTCancelKind {
5972 CancelNoreq = 0,
5973 CancelParallel = 1,
5974 CancelLoop = 2,
5975 CancelSections = 3,
5976 CancelTaskgroup = 4
5977};
5978} // anonymous namespace
5979
5980static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5981 RTCancelKind CancelKind = CancelNoreq;
5982 if (CancelRegion == OMPD_parallel)
5983 CancelKind = CancelParallel;
5984 else if (CancelRegion == OMPD_for)
5985 CancelKind = CancelLoop;
5986 else if (CancelRegion == OMPD_sections)
5987 CancelKind = CancelSections;
5988 else {
5989 assert(CancelRegion == OMPD_taskgroup);
5990 CancelKind = CancelTaskgroup;
5991 }
5992 return CancelKind;
5993}
5994
5997 OpenMPDirectiveKind CancelRegion) {
5998 if (!CGF.HaveInsertPoint())
5999 return;
6000 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6001 // global_tid, kmp_int32 cncl_kind);
6002 if (auto *OMPRegionInfo =
6003 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6004 // For 'cancellation point taskgroup', the task region info may not have a
6005 // cancel. This may instead happen in another adjacent task.
6006 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6007 llvm::Value *Args[] = {
6008 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6009 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6010 // Ignore return result until untied tasks are supported.
6011 llvm::Value *Result = CGF.EmitRuntimeCall(
6012 OMPBuilder.getOrCreateRuntimeFunction(
6013 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6014 Args);
6015 // if (__kmpc_cancellationpoint()) {
6016 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6017 // exit from construct;
6018 // }
6019 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6020 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6021 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6022 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6023 CGF.EmitBlock(ExitBB);
6024 if (CancelRegion == OMPD_parallel)
6025 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6026 // exit from construct;
6027 CodeGenFunction::JumpDest CancelDest =
6028 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6029 CGF.EmitBranchThroughCleanup(CancelDest);
6030 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6031 }
6032 }
6033}
6034
6036 const Expr *IfCond,
6037 OpenMPDirectiveKind CancelRegion) {
6038 if (!CGF.HaveInsertPoint())
6039 return;
6040 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6041 // kmp_int32 cncl_kind);
6042 auto &M = CGM.getModule();
6043 if (auto *OMPRegionInfo =
6044 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6045 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6046 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6047 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6048 llvm::Value *Args[] = {
6049 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6050 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6051 // Ignore return result until untied tasks are supported.
6052 llvm::Value *Result = CGF.EmitRuntimeCall(
6053 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6054 // if (__kmpc_cancel()) {
6055 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6056 // exit from construct;
6057 // }
6058 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6059 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6060 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6061 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6062 CGF.EmitBlock(ExitBB);
6063 if (CancelRegion == OMPD_parallel)
6064 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6065 // exit from construct;
6066 CodeGenFunction::JumpDest CancelDest =
6067 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6068 CGF.EmitBranchThroughCleanup(CancelDest);
6069 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6070 };
6071 if (IfCond) {
6072 emitIfClause(CGF, IfCond, ThenGen,
6073 [](CodeGenFunction &, PrePostActionTy &) {});
6074 } else {
6075 RegionCodeGenTy ThenRCG(ThenGen);
6076 ThenRCG(CGF);
6077 }
6078 }
6079}
6080
6081namespace {
6082/// Cleanup action for uses_allocators support.
6083class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6085
6086public:
6087 OMPUsesAllocatorsActionTy(
6088 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6089 : Allocators(Allocators) {}
6090 void Enter(CodeGenFunction &CGF) override {
6091 if (!CGF.HaveInsertPoint())
6092 return;
6093 for (const auto &AllocatorData : Allocators) {
6095 CGF, AllocatorData.first, AllocatorData.second);
6096 }
6097 }
6098 void Exit(CodeGenFunction &CGF) override {
6099 if (!CGF.HaveInsertPoint())
6100 return;
6101 for (const auto &AllocatorData : Allocators) {
6103 AllocatorData.first);
6104 }
6105 }
6106};
6107} // namespace
6108
6110 const OMPExecutableDirective &D, StringRef ParentName,
6111 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6112 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6113 assert(!ParentName.empty() && "Invalid target entry parent name!");
6116 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6117 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6118 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6119 if (!D.AllocatorTraits)
6120 continue;
6121 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6122 }
6123 }
6124 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6125 CodeGen.setAction(UsesAllocatorAction);
6126 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6127 IsOffloadEntry, CodeGen);
6128}
6129
6131 const Expr *Allocator,
6132 const Expr *AllocatorTraits) {
6133 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6134 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6135 // Use default memspace handle.
6136 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6137 llvm::Value *NumTraits = llvm::ConstantInt::get(
6139 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6140 ->getSize()
6141 .getLimitedValue());
6142 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6144 AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6145 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6146 AllocatorTraitsLVal.getBaseInfo(),
6147 AllocatorTraitsLVal.getTBAAInfo());
6148 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6149
6150 llvm::Value *AllocatorVal =
6151 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6152 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6153 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6154 // Store to allocator.
6156 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6157 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6158 AllocatorVal =
6159 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6160 Allocator->getType(), Allocator->getExprLoc());
6161 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6162}
6163
6165 const Expr *Allocator) {
6166 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6167 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6168 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6169 llvm::Value *AllocatorVal =
6170 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6171 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6172 CGF.getContext().VoidPtrTy,
6173 Allocator->getExprLoc());
6174 (void)CGF.EmitRuntimeCall(
6175 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6176 OMPRTL___kmpc_destroy_allocator),
6177 {ThreadId, AllocatorVal});
6178}
6179
6182 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6183 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6184 "invalid default attrs structure");
6185 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6186 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6187
6188 getNumTeamsExprForTargetDirective(CGF, D, Attrs.MinTeams, MaxTeamsVal);
6189 getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal,
6190 /*UpperBoundOnly=*/true);
6191
6192 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6193 for (auto *A : C->getAttrs()) {
6194 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6195 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6196 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6197 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6198 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6199 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6200 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6201 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6202 &AttrMaxThreadsVal);
6203 else
6204 continue;
6205
6206 Attrs.MinThreads = std::max(Attrs.MinThreads, AttrMinThreadsVal);
6207 if (AttrMaxThreadsVal > 0)
6208 MaxThreadsVal = MaxThreadsVal > 0
6209 ? std::min(MaxThreadsVal, AttrMaxThreadsVal)
6210 : AttrMaxThreadsVal;
6211 Attrs.MinTeams = std::max(Attrs.MinTeams, AttrMinBlocksVal);
6212 if (AttrMaxBlocksVal > 0)
6213 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal)
6214 : AttrMaxBlocksVal;
6215 }
6216 }
6217}
6218
6220 const OMPExecutableDirective &D, StringRef ParentName,
6221 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6222 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6223
6224 llvm::TargetRegionEntryInfo EntryInfo =
6225 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName);
6226
6227 CodeGenFunction CGF(CGM, true);
6228 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6229 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6230 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6231
6232 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6233 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6234 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D);
6235 };
6236
6237 cantFail(OMPBuilder.emitTargetRegionFunction(
6238 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6239 OutlinedFnID));
6240
6241 if (!OutlinedFn)
6242 return;
6243
6244 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6245
6246 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6247 for (auto *A : C->getAttrs()) {
6248 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6249 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6250 }
6251 }
6252}
6253
6254/// Checks if the expression is constant or does not have non-trivial function
6255/// calls.
6256static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6257 // We can skip constant expressions.
6258 // We can skip expressions with trivial calls or simple expressions.
6260 !E->hasNonTrivialCall(Ctx)) &&
6261 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6262}
6263
6265 const Stmt *Body) {
6266 const Stmt *Child = Body->IgnoreContainers();
6267 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6268 Child = nullptr;
6269 for (const Stmt *S : C->body()) {
6270 if (const auto *E = dyn_cast<Expr>(S)) {
6271 if (isTrivial(Ctx, E))
6272 continue;
6273 }
6274 // Some of the statements can be ignored.
6277 continue;
6278 // Analyze declarations.
6279 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6280 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6281 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6282 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6283 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6284 isa<UsingDirectiveDecl>(D) ||
6285 isa<OMPDeclareReductionDecl>(D) ||
6286 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6287 return true;
6288 const auto *VD = dyn_cast<VarDecl>(D);
6289 if (!VD)
6290 return false;
6291 return VD->hasGlobalStorage() || !VD->isUsed();
6292 }))
6293 continue;
6294 }
6295 // Found multiple children - cannot get the one child only.
6296 if (Child)
6297 return nullptr;
6298 Child = S;
6299 }
6300 if (Child)
6301 Child = Child->IgnoreContainers();
6302 }
6303 return Child;
6304}
6305
6307 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6308 int32_t &MaxTeamsVal) {
6309
6310 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6311 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6312 "Expected target-based executable directive.");
6313 switch (DirectiveKind) {
6314 case OMPD_target: {
6315 const auto *CS = D.getInnermostCapturedStmt();
6316 const auto *Body =
6317 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6318 const Stmt *ChildStmt =
6320 if (const auto *NestedDir =
6321 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6322 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6323 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6324 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6325 ->getNumTeams()
6326 .front();
6327 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6328 if (auto Constant =
6329 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6330 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6331 return NumTeams;
6332 }
6333 MinTeamsVal = MaxTeamsVal = 0;
6334 return nullptr;
6335 }
6336 MinTeamsVal = MaxTeamsVal = 1;
6337 return nullptr;
6338 }
6339 // A value of -1 is used to check if we need to emit no teams region
6340 MinTeamsVal = MaxTeamsVal = -1;
6341 return nullptr;
6342 }
6343 case OMPD_target_teams_loop:
6344 case OMPD_target_teams:
6345 case OMPD_target_teams_distribute:
6346 case OMPD_target_teams_distribute_simd:
6347 case OMPD_target_teams_distribute_parallel_for:
6348 case OMPD_target_teams_distribute_parallel_for_simd: {
6349 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6350 const Expr *NumTeams =
6351 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6352 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6353 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6354 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6355 return NumTeams;
6356 }
6357 MinTeamsVal = MaxTeamsVal = 0;
6358 return nullptr;
6359 }
6360 case OMPD_target_parallel:
6361 case OMPD_target_parallel_for:
6362 case OMPD_target_parallel_for_simd:
6363 case OMPD_target_parallel_loop:
6364 case OMPD_target_simd:
6365 MinTeamsVal = MaxTeamsVal = 1;
6366 return nullptr;
6367 case OMPD_parallel:
6368 case OMPD_for:
6369 case OMPD_parallel_for:
6370 case OMPD_parallel_loop:
6371 case OMPD_parallel_master:
6372 case OMPD_parallel_sections:
6373 case OMPD_for_simd:
6374 case OMPD_parallel_for_simd:
6375 case OMPD_cancel:
6376 case OMPD_cancellation_point:
6377 case OMPD_ordered:
6378 case OMPD_threadprivate:
6379 case OMPD_allocate:
6380 case OMPD_task:
6381 case OMPD_simd:
6382 case OMPD_tile:
6383 case OMPD_unroll:
6384 case OMPD_sections:
6385 case OMPD_section:
6386 case OMPD_single:
6387 case OMPD_master:
6388 case OMPD_critical:
6389 case OMPD_taskyield:
6390 case OMPD_barrier:
6391 case OMPD_taskwait:
6392 case OMPD_taskgroup:
6393 case OMPD_atomic:
6394 case OMPD_flush:
6395 case OMPD_depobj:
6396 case OMPD_scan:
6397 case OMPD_teams:
6398 case OMPD_target_data:
6399 case OMPD_target_exit_data:
6400 case OMPD_target_enter_data:
6401 case OMPD_distribute:
6402 case OMPD_distribute_simd:
6403 case OMPD_distribute_parallel_for:
6404 case OMPD_distribute_parallel_for_simd:
6405 case OMPD_teams_distribute:
6406 case OMPD_teams_distribute_simd:
6407 case OMPD_teams_distribute_parallel_for:
6408 case OMPD_teams_distribute_parallel_for_simd:
6409 case OMPD_target_update:
6410 case OMPD_declare_simd:
6411 case OMPD_declare_variant:
6412 case OMPD_begin_declare_variant:
6413 case OMPD_end_declare_variant:
6414 case OMPD_declare_target:
6415 case OMPD_end_declare_target:
6416 case OMPD_declare_reduction:
6417 case OMPD_declare_mapper:
6418 case OMPD_taskloop:
6419 case OMPD_taskloop_simd:
6420 case OMPD_master_taskloop:
6421 case OMPD_master_taskloop_simd:
6422 case OMPD_parallel_master_taskloop:
6423 case OMPD_parallel_master_taskloop_simd:
6424 case OMPD_requires:
6425 case OMPD_metadirective:
6426 case OMPD_unknown:
6427 break;
6428 default:
6429 break;
6430 }
6431 llvm_unreachable("Unexpected directive kind.");
6432}
6433
6435 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6436 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6437 "Clauses associated with the teams directive expected to be emitted "
6438 "only for the host!");
6439 CGBuilderTy &Bld = CGF.Builder;
6440 int32_t MinNT = -1, MaxNT = -1;
6441 const Expr *NumTeams =
6442 getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
6443 if (NumTeams != nullptr) {
6444 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6445
6446 switch (DirectiveKind) {
6447 case OMPD_target: {
6448 const auto *CS = D.getInnermostCapturedStmt();
6449 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6450 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6451 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6452 /*IgnoreResultAssign*/ true);
6453 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6454 /*isSigned=*/true);
6455 }
6456 case OMPD_target_teams:
6457 case OMPD_target_teams_distribute:
6458 case OMPD_target_teams_distribute_simd:
6459 case OMPD_target_teams_distribute_parallel_for:
6460 case OMPD_target_teams_distribute_parallel_for_simd: {
6461 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6462 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6463 /*IgnoreResultAssign*/ true);
6464 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6465 /*isSigned=*/true);
6466 }
6467 default:
6468 break;
6469 }
6470 }
6471
6472 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6473 return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
6474}
6475
6476/// Check for a num threads constant value (stored in \p DefaultVal), or
6477/// expression (stored in \p E). If the value is conditional (via an if-clause),
6478/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6479/// nullptr, no expression evaluation is perfomed.
6480static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6481 const Expr **E, int32_t &UpperBound,
6482 bool UpperBoundOnly, llvm::Value **CondVal) {
6484 CGF.getContext(), CS->getCapturedStmt());
6485 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6486 if (!Dir)
6487 return;
6488
6489 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6490 // Handle if clause. If if clause present, the number of threads is
6491 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6492 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6493 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6494 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6495 const OMPIfClause *IfClause = nullptr;
6496 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6497 if (C->getNameModifier() == OMPD_unknown ||
6498 C->getNameModifier() == OMPD_parallel) {
6499 IfClause = C;
6500 break;
6501 }
6502 }
6503 if (IfClause) {
6504 const Expr *CondExpr = IfClause->getCondition();
6505 bool Result;
6506 if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6507 if (!Result) {
6508 UpperBound = 1;
6509 return;
6510 }
6511 } else {
6513 if (const auto *PreInit =
6514 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6515 for (const auto *I : PreInit->decls()) {
6516 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6517 CGF.EmitVarDecl(cast<VarDecl>(*I));
6518 } else {
6521 CGF.EmitAutoVarCleanups(Emission);
6522 }
6523 }
6524 *CondVal = CGF.EvaluateExprAsBool(CondExpr);
6525 }
6526 }
6527 }
6528 }
6529 // Check the value of num_threads clause iff if clause was not specified
6530 // or is not evaluated to false.
6531 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6532 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6533 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6534 const auto *NumThreadsClause =
6535 Dir->getSingleClause<OMPNumThreadsClause>();
6536 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6537 if (NTExpr->isIntegerConstantExpr(CGF.getContext()))
6538 if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext()))
6539 UpperBound =
6540 UpperBound
6541 ? Constant->getZExtValue()
6542 : std::min(UpperBound,
6543 static_cast<int32_t>(Constant->getZExtValue()));
6544 // If we haven't found a upper bound, remember we saw a thread limiting
6545 // clause.
6546 if (UpperBound == -1)
6547 UpperBound = 0;
6548 if (!E)
6549 return;
6550 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6551 if (const auto *PreInit =
6552 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6553 for (const auto *I : PreInit->decls()) {
6554 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6555 CGF.EmitVarDecl(cast<VarDecl>(*I));
6556 } else {
6559 CGF.EmitAutoVarCleanups(Emission);
6560 }
6561 }
6562 }
6563 *E = NTExpr;
6564 }
6565 return;
6566 }
6567 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6568 UpperBound = 1;
6569}
6570
6572 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6573 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6574 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6575 "Clauses associated with the teams directive expected to be emitted "
6576 "only for the host!");
6577 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6578 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6579 "Expected target-based executable directive.");
6580
6581 const Expr *NT = nullptr;
6582 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6583
6584 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6585 if (E->isIntegerConstantExpr(CGF.getContext())) {
6586 if (auto Constant = E->getIntegerConstantExpr(CGF.getContext()))
6587 UpperBound = UpperBound ? Constant->getZExtValue()
6588 : std::min(UpperBound,
6589 int32_t(Constant->getZExtValue()));
6590 }
6591 // If we haven't found a upper bound, remember we saw a thread limiting
6592 // clause.
6593 if (UpperBound == -1)
6594 UpperBound = 0;
6595 if (EPtr)
6596 *EPtr = E;
6597 };
6598
6599 auto ReturnSequential = [&]() {
6600 UpperBound = 1;
6601 return NT;
6602 };
6603
6604 switch (DirectiveKind) {
6605 case OMPD_target: {
6606 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6607 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6609 CGF.getContext(), CS->getCapturedStmt());
6610 // TODO: The standard is not clear how to resolve two thread limit clauses,
6611 // let's pick the teams one if it's present, otherwise the target one.
6612 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6613 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6614 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6615 ThreadLimitClause = TLC;
6616 if (ThreadLimitExpr) {
6617 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6618 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6620 CGF,
6621 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6622 if (const auto *PreInit =
6623 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6624 for (const auto *I : PreInit->decls()) {
6625 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6626 CGF.EmitVarDecl(cast<VarDecl>(*I));
6627 } else {
6630 CGF.EmitAutoVarCleanups(Emission);
6631 }
6632 }
6633 }
6634 }
6635 }
6636 }
6637 if (ThreadLimitClause)
6638 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6639 ThreadLimitExpr);
6640 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6641 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6642 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6643 CS = Dir->getInnermostCapturedStmt();
6645 CGF.getContext(), CS->getCapturedStmt());
6646 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6647 }
6648 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6649 CS = Dir->getInnermostCapturedStmt();
6650 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6651 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6652 return ReturnSequential();
6653 }
6654 return NT;
6655 }
6656 case OMPD_target_teams: {
6657 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6658 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6659 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6660 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6661 ThreadLimitExpr);
6662 }
6663 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6664 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6666 CGF.getContext(), CS->getCapturedStmt());
6667 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6668 if (Dir->getDirectiveKind() == OMPD_distribute) {
6669 CS = Dir->getInnermostCapturedStmt();
6670 getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal);
6671 }
6672 }
6673 return NT;
6674 }
6675 case OMPD_target_teams_distribute:
6676 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6677 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6678 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6679 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6680 ThreadLimitExpr);
6681 }
6682 getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound,
6683 UpperBoundOnly, CondVal);
6684 return NT;
6685 case OMPD_target_teams_loop:
6686 case OMPD_target_parallel_loop:
6687 case OMPD_target_parallel:
6688 case OMPD_target_parallel_for:
6689 case OMPD_target_parallel_for_simd:
6690 case OMPD_target_teams_distribute_parallel_for:
6691 case OMPD_target_teams_distribute_parallel_for_simd: {
6692 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6693 const OMPIfClause *IfClause = nullptr;
6694 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6695 if (C->getNameModifier() == OMPD_unknown ||
6696 C->getNameModifier() == OMPD_parallel) {
6697 IfClause = C;
6698 break;
6699 }
6700 }
6701 if (IfClause) {
6702 const Expr *Cond = IfClause->getCondition();
6703 bool Result;
6704 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6705 if (!Result)
6706 return ReturnSequential();
6707 } else {
6709 *CondVal = CGF.EvaluateExprAsBool(Cond);
6710 }
6711 }
6712 }
6713 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6714 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6715 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6716 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6717 ThreadLimitExpr);
6718 }
6719 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6720 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6721 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6722 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6723 return NumThreadsClause->getNumThreads();
6724 }
6725 return NT;
6726 }
6727 case OMPD_target_teams_distribute_simd:
6728 case OMPD_target_simd:
6729 return ReturnSequential();
6730 default:
6731 break;
6732 }
6733 llvm_unreachable("Unsupported directive kind.");
6734}
6735
6737 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6738 llvm::Value *NumThreadsVal = nullptr;
6739 llvm::Value *CondVal = nullptr;
6740 llvm::Value *ThreadLimitVal = nullptr;
6741 const Expr *ThreadLimitExpr = nullptr;
6742 int32_t UpperBound = -1;
6743
6745 CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal,
6746 &ThreadLimitExpr);
6747
6748 // Thread limit expressions are used below, emit them.
6749 if (ThreadLimitExpr) {
6750 ThreadLimitVal =
6751 CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6752 ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty,
6753 /*isSigned=*/false);
6754 }
6755
6756 // Generate the num teams expression.
6757 if (UpperBound == 1) {
6758 NumThreadsVal = CGF.Builder.getInt32(UpperBound);
6759 } else if (NT) {
6760 NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true);
6761 NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty,
6762 /*isSigned=*/false);
6763 } else if (ThreadLimitVal) {
6764 // If we do not have a num threads value but a thread limit, replace the
6765 // former with the latter. We know handled the thread limit expression.
6766 NumThreadsVal = ThreadLimitVal;
6767 ThreadLimitVal = nullptr;
6768 } else {
6769 // Default to "0" which means runtime choice.
6770 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6771 NumThreadsVal = CGF.Builder.getInt32(0);
6772 }
6773
6774 // Handle if clause. If if clause present, the number of threads is
6775 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6776 if (CondVal) {
6778 NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal,
6779 CGF.Builder.getInt32(1));
6780 }
6781
6782 // If the thread limit and num teams expression were present, take the
6783 // minimum.
6784 if (ThreadLimitVal) {
6785 NumThreadsVal = CGF.Builder.CreateSelect(
6786 CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal),
6787 ThreadLimitVal, NumThreadsVal);
6788 }
6789
6790 return NumThreadsVal;
6791}
6792
6793namespace {
6795
6796// Utility to handle information from clauses associated with a given
6797// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6798// It provides a convenient interface to obtain the information and generate
6799// code for that information.
6800class MappableExprsHandler {
6801public:
6802 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6803 static unsigned getFlagMemberOffset() {
6804 unsigned Offset = 0;
6805 for (uint64_t Remain =
6806 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6807 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6808 !(Remain & 1); Remain = Remain >> 1)
6809 Offset++;
6810 return Offset;
6811 }
6812
6813 /// Class that holds debugging information for a data mapping to be passed to
6814 /// the runtime library.
6815 class MappingExprInfo {
6816 /// The variable declaration used for the data mapping.
6817 const ValueDecl *MapDecl = nullptr;
6818 /// The original expression used in the map clause, or null if there is
6819 /// none.
6820 const Expr *MapExpr = nullptr;
6821
6822 public:
6823 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6824 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6825
6826 const ValueDecl *getMapDecl() const { return MapDecl; }
6827 const Expr *getMapExpr() const { return MapExpr; }
6828 };
6829
6830 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6831 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6832 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6833 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6834 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6835 using MapNonContiguousArrayTy =
6836 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6837 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6838 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6839 using MapData =
6841 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
6842 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
6843 using MapDataArrayTy = SmallVector<MapData, 4>;
6844
6845 /// This structure contains combined information generated for mappable
6846 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6847 /// mappers, and non-contiguous information.
6848 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6849 MapExprsArrayTy Exprs;
6850 MapValueDeclsArrayTy Mappers;
6851 MapValueDeclsArrayTy DevicePtrDecls;
6852
6853 /// Append arrays in \a CurInfo.
6854 void append(MapCombinedInfoTy &CurInfo) {
6855 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6856 DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(),
6857 CurInfo.DevicePtrDecls.end());
6858 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6859 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6860 }
6861 };
6862
6863 /// Map between a struct and the its lowest & highest elements which have been
6864 /// mapped.
6865 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6866 /// HE(FieldIndex, Pointer)}
6867 struct StructRangeInfoTy {
6868 MapCombinedInfoTy PreliminaryMapData;
6869 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6870 0, Address::invalid()};
6871 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6872 0, Address::invalid()};
6873 Address Base = Address::invalid();
6874 Address LB = Address::invalid();
6875 bool IsArraySection = false;
6876 bool HasCompleteRecord = false;
6877 };
6878
6879private:
6880 /// Kind that defines how a device pointer has to be returned.
6881 struct MapInfo {
6884 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6885 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6886 bool ReturnDevicePointer = false;
6887 bool IsImplicit = false;
6888 const ValueDecl *Mapper = nullptr;
6889 const Expr *VarRef = nullptr;
6890 bool ForDeviceAddr = false;
6891
6892 MapInfo() = default;
6893 MapInfo(
6895 OpenMPMapClauseKind MapType,
6896 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6897 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6898 bool ReturnDevicePointer, bool IsImplicit,
6899 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6900 bool ForDeviceAddr = false)
6901 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6902 MotionModifiers(MotionModifiers),
6903 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6904 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6905 };
6906
6907 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6908 /// member and there is no map information about it, then emission of that
6909 /// entry is deferred until the whole struct has been processed.
6910 struct DeferredDevicePtrEntryTy {
6911 const Expr *IE = nullptr;
6912 const ValueDecl *VD = nullptr;
6913 bool ForDeviceAddr = false;
6914
6915 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6916 bool ForDeviceAddr)
6917 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6918 };
6919
6920 /// The target directive from where the mappable clauses were extracted. It
6921 /// is either a executable directive or a user-defined mapper directive.
6922 llvm::PointerUnion<const OMPExecutableDirective *,
6923 const OMPDeclareMapperDecl *>
6924 CurDir;
6925
6926 /// Function the directive is being generated for.
6927 CodeGenFunction &CGF;
6928
6929 /// Set of all first private variables in the current directive.
6930 /// bool data is set to true if the variable is implicitly marked as
6931 /// firstprivate, false otherwise.
6932 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6933
6934 /// Map between device pointer declarations and their expression components.
6935 /// The key value for declarations in 'this' is null.
6936 llvm::DenseMap<
6937 const ValueDecl *,
6938 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6939 DevPointersMap;
6940
6941 /// Map between device addr declarations and their expression components.
6942 /// The key value for declarations in 'this' is null.
6943 llvm::DenseMap<
6944 const ValueDecl *,
6945 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6946 HasDevAddrsMap;
6947
6948 /// Map between lambda declarations and their map type.
6949 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6950
6951 llvm::Value *getExprTypeSize(const Expr *E) const {
6952 QualType ExprTy = E->getType().getCanonicalType();
6953
6954 // Calculate the size for array shaping expression.
6955 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6956 llvm::Value *Size =
6957 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6958 for (const Expr *SE : OAE->getDimensions()) {
6959 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6960 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6961 CGF.getContext().getSizeType(),
6962 SE->getExprLoc());
6963 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6964 }
6965 return Size;
6966 }
6967
6968 // Reference types are ignored for mapping purposes.
6969 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6970 ExprTy = RefTy->getPointeeType().getCanonicalType();
6971
6972 // Given that an array section is considered a built-in type, we need to
6973 // do the calculation based on the length of the section instead of relying
6974 // on CGF.getTypeSize(E->getType()).
6975 if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) {
6976 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6977 OAE->getBase()->IgnoreParenImpCasts())
6979
6980 // If there is no length associated with the expression and lower bound is
6981 // not specified too, that means we are using the whole length of the
6982 // base.
6983 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6984 !OAE->getLowerBound())
6985 return CGF.getTypeSize(BaseTy);
6986
6987 llvm::Value *ElemSize;
6988 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6989 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6990 } else {
6991 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6992 assert(ATy && "Expecting array type if not a pointer type.");
6993 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6994 }
6995
6996 // If we don't have a length at this point, that is because we have an
6997 // array section with a single element.
6998 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6999 return ElemSize;
7000
7001 if (const Expr *LenExpr = OAE->getLength()) {
7002 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7003 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7004 CGF.getContext().getSizeType(),
7005 LenExpr->getExprLoc());
7006 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7007 }
7008 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7009 OAE->getLowerBound() && "expected array_section[lb:].");
7010 // Size = sizetype - lb * elemtype;
7011 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7012 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7013 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7014 CGF.getContext().getSizeType(),
7015 OAE->getLowerBound()->getExprLoc());
7016 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7017 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7018 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7019 LengthVal = CGF.Builder.CreateSelect(
7020 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7021 return LengthVal;
7022 }
7023 return CGF.getTypeSize(ExprTy);
7024 }
7025
7026 /// Return the corresponding bits for a given map clause modifier. Add
7027 /// a flag marking the map as a pointer if requested. Add a flag marking the
7028 /// map as the first one of a series of maps that relate to the same map
7029 /// expression.
7030 OpenMPOffloadMappingFlags getMapTypeBits(
7031 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7032 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7033 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7034 OpenMPOffloadMappingFlags Bits =
7035 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7036 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7037 switch (MapType) {
7038 case OMPC_MAP_alloc:
7039 case OMPC_MAP_release:
7040 // alloc and release is the default behavior in the runtime library, i.e.
7041 // if we don't pass any bits alloc/release that is what the runtime is
7042 // going to do. Therefore, we don't need to signal anything for these two
7043 // type modifiers.
7044 break;
7045 case OMPC_MAP_to:
7046 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7047 break;
7048 case OMPC_MAP_from:
7049 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7050 break;
7051 case OMPC_MAP_tofrom:
7052 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7053 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7054 break;
7055 case OMPC_MAP_delete:
7056 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7057 break;
7058 case OMPC_MAP_unknown:
7059 llvm_unreachable("Unexpected map type!");
7060 }
7061 if (AddPtrFlag)
7062 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7063 if (AddIsTargetParamFlag)
7064 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7065 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7066 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7067 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7068 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7069 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7070 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7071 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7072 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7073 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7074 if (IsNonContiguous)
7075 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7076 return Bits;
7077 }
7078
7079 /// Return true if the provided expression is a final array section. A
7080 /// final array section, is one whose length can't be proved to be one.
7081 bool isFinalArraySectionExpression(const Expr *E) const {
7082 const auto *OASE = dyn_cast<ArraySectionExpr>(E);
7083
7084 // It is not an array section and therefore not a unity-size one.
7085 if (!OASE)
7086 return false;
7087
7088 // An array section with no colon always refer to a single element.
7089 if (OASE->getColonLocFirst().isInvalid())
7090 return false;
7091
7092 const Expr *Length = OASE->getLength();
7093
7094 // If we don't have a length we have to check if the array has size 1
7095 // for this dimension. Also, we should always expect a length if the
7096 // base type is pointer.
7097 if (!Length) {
7098 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7099 OASE->getBase()->IgnoreParenImpCasts())
7101 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7102 return ATy->getSExtSize() != 1;
7103 // If we don't have a constant dimension length, we have to consider
7104 // the current section as having any size, so it is not necessarily
7105 // unitary. If it happen to be unity size, that's user fault.
7106 return true;
7107 }
7108
7109 // Check if the length evaluates to 1.
7110 Expr::EvalResult Result;
7111 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7112 return true; // Can have more that size 1.
7113
7114 llvm::APSInt ConstLength = Result.Val.getInt();
7115 return ConstLength.getSExtValue() != 1;
7116 }
7117
7118 /// A helper class to copy structures with overlapped elements, i.e. those
7119 /// which have mappings of both "s" and "s.mem". Consecutive elements that
7120 /// are not explicitly copied have mapping nodes synthesized for them,
7121 /// taking care to avoid generating zero-sized copies.
7122 class CopyOverlappedEntryGaps {
7123 CodeGenFunction &CGF;
7124 MapCombinedInfoTy &CombinedInfo;
7125 OpenMPOffloadMappingFlags Flags = OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7126 const ValueDecl *MapDecl = nullptr;
7127 const Expr *MapExpr = nullptr;
7128 Address BP = Address::invalid();
7129 bool IsNonContiguous = false;
7130 uint64_t DimSize = 0;
7131 // These elements track the position as the struct is iterated over
7132 // (in order of increasing element address).
7133 const RecordDecl *LastParent = nullptr;
7134 uint64_t Cursor = 0;
7135 unsigned LastIndex = -1u;
7136 Address LB = Address::invalid();
7137
7138 public:
7139 CopyOverlappedEntryGaps(CodeGenFunction &CGF,
7140 MapCombinedInfoTy &CombinedInfo,
7141 OpenMPOffloadMappingFlags Flags,
7142 const ValueDecl *MapDecl, const Expr *MapExpr,
7143 Address BP, Address LB, bool IsNonContiguous,
7144 uint64_t DimSize)
7145 : CGF(CGF), CombinedInfo(CombinedInfo), Flags(Flags), MapDecl(MapDecl),
7146 MapExpr(MapExpr), BP(BP), IsNonContiguous(IsNonContiguous),
7147 DimSize(DimSize), LB(LB) {}
7148
7149 void processField(
7150 const OMPClauseMappableExprCommon::MappableComponent &MC,
7151 const FieldDecl *FD,
7152 llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)>
7153 EmitMemberExprBase) {
7154 const RecordDecl *RD = FD->getParent();
7155 const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7156 uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7157 uint64_t FieldSize =
7159 Address ComponentLB = Address::invalid();
7160
7161 if (FD->getType()->isLValueReferenceType()) {
7162 const auto *ME = cast<MemberExpr>(MC.getAssociatedExpression());
7163 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7164 ComponentLB =
7165 CGF.EmitLValueForFieldInitialization(BaseLVal, FD).getAddress();
7166 } else {
7167 ComponentLB =
7169 }
7170
7171 if (!LastParent)
7172 LastParent = RD;
7173 if (FD->getParent() == LastParent) {
7174 if (FD->getFieldIndex() != LastIndex + 1)
7175 copyUntilField(FD, ComponentLB);
7176 } else {
7177 LastParent = FD->getParent();
7178 if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7179 copyUntilField(FD, ComponentLB);
7180 }
7181 Cursor = FieldOffset + FieldSize;
7182 LastIndex = FD->getFieldIndex();
7183 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7184 }
7185
7186 void copyUntilField(const FieldDecl *FD, Address ComponentLB) {
7187 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7188 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7189 llvm::Value *Size =
7190 CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, LBPtr);
7191 copySizedChunk(LBPtr, Size);
7192 }
7193
7194 void copyUntilEnd(Address HB) {
7195 if (LastParent) {
7196 const ASTRecordLayout &RL =
7197 CGF.getContext().getASTRecordLayout(LastParent);
7198 if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7199 return;
7200 }
7201 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7202 llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7203 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7204 LBPtr);
7205 copySizedChunk(LBPtr, Size);
7206 }
7207
7208 void copySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7209 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7210 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7211 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7212 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7213 CombinedInfo.Pointers.push_back(Base);
7214 CombinedInfo.Sizes.push_back(
7215 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7216 CombinedInfo.Types.push_back(Flags);
7217 CombinedInfo.Mappers.push_back(nullptr);
7218 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7219 }
7220 };
7221
7222 /// Generate the base pointers, section pointers, sizes, map type bits, and
7223 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7224 /// map type, map or motion modifiers, and expression components.
7225 /// \a IsFirstComponent should be set to true if the provided set of
7226 /// components is the first associated with a capture.
7227 void generateInfoForComponentList(
7228 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7229 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7231 MapCombinedInfoTy &CombinedInfo,
7232 MapCombinedInfoTy &StructBaseCombinedInfo,
7233 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7234 bool IsImplicit, bool GenerateAllInfoForClauses,
7235 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7236 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7237 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7238 OverlappedElements = {},
7239 bool AreBothBasePtrAndPteeMapped = false) const {
7240 // The following summarizes what has to be generated for each map and the
7241 // types below. The generated information is expressed in this order:
7242 // base pointer, section pointer, size, flags
7243 // (to add to the ones that come from the map type and modifier).
7244 //
7245 // double d;
7246 // int i[100];
7247 // float *p;
7248 // int **a = &i;
7249 //
7250 // struct S1 {
7251 // int i;
7252 // float f[50];
7253 // }
7254 // struct S2 {
7255 // int i;
7256 // float f[50];
7257 // S1 s;
7258 // double *p;
7259 // struct S2 *ps;
7260 // int &ref;
7261 // }
7262 // S2 s;
7263 // S2 *ps;
7264 //
7265 // map(d)
7266 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7267 //
7268 // map(i)
7269 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7270 //
7271 // map(i[1:23])
7272 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7273 //
7274 // map(p)
7275 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7276 //
7277 // map(p[1:24])
7278 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7279 // in unified shared memory mode or for local pointers
7280 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7281 //
7282 // map((*a)[0:3])
7283 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7284 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7285 //
7286 // map(**a)
7287 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7288 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7289 //
7290 // map(s)
7291 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7292 //
7293 // map(s.i)
7294 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7295 //
7296 // map(s.s.f)
7297 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7298 //
7299 // map(s.p)
7300 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7301 //
7302 // map(to: s.p[:22])
7303 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7304 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7305 // &(s.p), &(s.p[0]), 22*sizeof(double),
7306 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7307 // (*) alloc space for struct members, only this is a target parameter
7308 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7309 // optimizes this entry out, same in the examples below)
7310 // (***) map the pointee (map: to)
7311 //
7312 // map(to: s.ref)
7313 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7314 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7315 // (*) alloc space for struct members, only this is a target parameter
7316 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7317 // optimizes this entry out, same in the examples below)
7318 // (***) map the pointee (map: to)
7319 //
7320 // map(s.ps)
7321 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7322 //
7323 // map(from: s.ps->s.i)
7324 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7325 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7326 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7327 //
7328 // map(to: s.ps->ps)
7329 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7330 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7331 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7332 //
7333 // map(s.ps->ps->ps)
7334 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7335 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7336 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7337 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7338 //
7339 // map(to: s.ps->ps->s.f[:22])
7340 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7341 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7342 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7343 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7344 //
7345 // map(ps)
7346 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7347 //
7348 // map(ps->i)
7349 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7350 //
7351 // map(ps->s.f)
7352 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7353 //
7354 // map(from: ps->p)
7355 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7356 //
7357 // map(to: ps->p[:22])
7358 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7359 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7360 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7361 //
7362 // map(ps->ps)
7363 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7364 //
7365 // map(from: ps->ps->s.i)
7366 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7367 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7368 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7369 //
7370 // map(from: ps->ps->ps)
7371 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7372 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7373 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7374 //
7375 // map(ps->ps->ps->ps)
7376 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7377 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7378 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7379 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7380 //
7381 // map(to: ps->ps->ps->s.f[:22])
7382 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7383 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7384 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7385 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7386 //
7387 // map(to: s.f[:22]) map(from: s.p[:33])
7388 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7389 // sizeof(double*) (**), TARGET_PARAM
7390 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7391 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7392 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7393 // (*) allocate contiguous space needed to fit all mapped members even if
7394 // we allocate space for members not mapped (in this example,
7395 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7396 // them as well because they fall between &s.f[0] and &s.p)
7397 //
7398 // map(from: s.f[:22]) map(to: ps->p[:33])
7399 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7400 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7401 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7402 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7403 // (*) the struct this entry pertains to is the 2nd element in the list of
7404 // arguments, hence MEMBER_OF(2)
7405 //
7406 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7407 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7408 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7409 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7410 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7411 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7412 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7413 // (*) the struct this entry pertains to is the 4th element in the list
7414 // of arguments, hence MEMBER_OF(4)
7415 //
7416 // map(p, p[:100])
7417 // ===> map(p[:100])
7418 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7419
7420 // Track if the map information being generated is the first for a capture.
7421 bool IsCaptureFirstInfo = IsFirstComponentList;
7422 // When the variable is on a declare target link or in a to clause with
7423 // unified memory, a reference is needed to hold the host/device address
7424 // of the variable.
7425 bool RequiresReference = false;
7426
7427 // Scan the components from the base to the complete expression.
7428 auto CI = Components.rbegin();
7429 auto CE = Components.rend();
7430 auto I = CI;
7431
7432 // Track if the map information being generated is the first for a list of
7433 // components.
7434 bool IsExpressionFirstInfo = true;
7435 bool FirstPointerInComplexData = false;
7436 Address BP = Address::invalid();
7437 const Expr *AssocExpr = I->getAssociatedExpression();
7438 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7439 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7440 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7441
7442 if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
7443 return;
7444 if (isa<MemberExpr>(AssocExpr)) {
7445 // The base is the 'this' pointer. The content of the pointer is going
7446 // to be the base of the field being mapped.
7447 BP = CGF.LoadCXXThisAddress();
7448 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7449 (OASE &&
7450 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7451 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7452 } else if (OAShE &&
7453 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7454 BP = Address(
7455 CGF.EmitScalarExpr(OAShE->getBase()),
7456 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7457 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7458 } else {
7459 // The base is the reference to the variable.
7460 // BP = &Var.
7461 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7462 if (const auto *VD =
7463 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7464 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7465 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7466 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7467 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7468 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7470 RequiresReference = true;
7472 }
7473 }
7474 }
7475
7476 // If the variable is a pointer and is being dereferenced (i.e. is not
7477 // the last component), the base has to be the pointer itself, not its
7478 // reference. References are ignored for mapping purposes.
7479 QualType Ty =
7480 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7481 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7482 // No need to generate individual map information for the pointer, it
7483 // can be associated with the combined storage if shared memory mode is
7484 // active or the base declaration is not global variable.
7485 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7486 if (!AreBothBasePtrAndPteeMapped &&
7488 !VD || VD->hasLocalStorage()))
7489 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7490 else
7491 FirstPointerInComplexData = true;
7492 ++I;
7493 }
7494 }
7495
7496 // Track whether a component of the list should be marked as MEMBER_OF some
7497 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7498 // in a component list should be marked as MEMBER_OF, all subsequent entries
7499 // do not belong to the base struct. E.g.
7500 // struct S2 s;
7501 // s.ps->ps->ps->f[:]
7502 // (1) (2) (3) (4)
7503 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7504 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7505 // is the pointee of ps(2) which is not member of struct s, so it should not
7506 // be marked as such (it is still PTR_AND_OBJ).
7507 // The variable is initialized to false so that PTR_AND_OBJ entries which
7508 // are not struct members are not considered (e.g. array of pointers to
7509 // data).
7510 bool ShouldBeMemberOf = false;
7511
7512 // Variable keeping track of whether or not we have encountered a component
7513 // in the component list which is a member expression. Useful when we have a
7514 // pointer or a final array section, in which case it is the previous
7515 // component in the list which tells us whether we have a member expression.
7516 // E.g. X.f[:]
7517 // While processing the final array section "[:]" it is "f" which tells us
7518 // whether we are dealing with a member of a declared struct.
7519 const MemberExpr *EncounteredME = nullptr;
7520
7521 // Track for the total number of dimension. Start from one for the dummy
7522 // dimension.
7523 uint64_t DimSize = 1;
7524
7525 // Detects non-contiguous updates due to strided accesses.
7526 // Sets the 'IsNonContiguous' flag so that the 'MapType' bits are set
7527 // correctly when generating information to be passed to the runtime. The
7528 // flag is set to true if any array section has a stride not equal to 1, or
7529 // if the stride is not a constant expression (conservatively assumed
7530 // non-contiguous).
7531 bool IsNonContiguous =
7532 CombinedInfo.NonContigInfo.IsNonContiguous ||
7533 any_of(Components, [&](const auto &Component) {
7534 const auto *OASE =
7535 dyn_cast<ArraySectionExpr>(Component.getAssociatedExpression());
7536 if (!OASE)
7537 return false;
7538
7539 const Expr *StrideExpr = OASE->getStride();
7540 if (!StrideExpr)
7541 return false;
7542
7543 const auto Constant =
7544 StrideExpr->getIntegerConstantExpr(CGF.getContext());
7545 if (!Constant)
7546 return false;
7547
7548 return !Constant->isOne();
7549 });
7550
7551 bool IsPrevMemberReference = false;
7552
7553 bool IsPartialMapped =
7554 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7555
7556 // We need to check if we will be encountering any MEs. If we do not
7557 // encounter any ME expression it means we will be mapping the whole struct.
7558 // In that case we need to skip adding an entry for the struct to the
7559 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7560 // list only when generating all info for clauses.
7561 bool IsMappingWholeStruct = true;
7562 if (!GenerateAllInfoForClauses) {
7563 IsMappingWholeStruct = false;
7564 } else {
7565 for (auto TempI = I; TempI != CE; ++TempI) {
7566 const MemberExpr *PossibleME =
7567 dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
7568 if (PossibleME) {
7569 IsMappingWholeStruct = false;
7570 break;
7571 }
7572 }
7573 }
7574
7575 for (; I != CE; ++I) {
7576 // If the current component is member of a struct (parent struct) mark it.
7577 if (!EncounteredME) {
7578 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7579 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7580 // as MEMBER_OF the parent struct.
7581 if (EncounteredME) {
7582 ShouldBeMemberOf = true;
7583 // Do not emit as complex pointer if this is actually not array-like
7584 // expression.
7585 if (FirstPointerInComplexData) {
7586 QualType Ty = std::prev(I)
7587 ->getAssociatedDeclaration()
7588 ->getType()
7589 .getNonReferenceType();
7590 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7591 FirstPointerInComplexData = false;
7592 }
7593 }
7594 }
7595
7596 auto Next = std::next(I);
7597
7598 // We need to generate the addresses and sizes if this is the last
7599 // component, if the component is a pointer or if it is an array section
7600 // whose length can't be proved to be one. If this is a pointer, it
7601 // becomes the base address for the following components.
7602
7603 // A final array section, is one whose length can't be proved to be one.
7604 // If the map item is non-contiguous then we don't treat any array section
7605 // as final array section.
7606 bool IsFinalArraySection =
7607 !IsNonContiguous &&
7608 isFinalArraySectionExpression(I->getAssociatedExpression());
7609
7610 // If we have a declaration for the mapping use that, otherwise use
7611 // the base declaration of the map clause.
7612 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7613 ? I->getAssociatedDeclaration()
7614 : BaseDecl;
7615 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7616 : MapExpr;
7617
7618 // Get information on whether the element is a pointer. Have to do a
7619 // special treatment for array sections given that they are built-in
7620 // types.
7621 const auto *OASE =
7622 dyn_cast<ArraySectionExpr>(I->getAssociatedExpression());
7623 const auto *OAShE =
7624 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7625 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7626 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7627 bool IsPointer =
7628 OAShE ||
7631 ->isAnyPointerType()) ||
7632 I->getAssociatedExpression()->getType()->isAnyPointerType();
7633 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7634 MapDecl &&
7635 MapDecl->getType()->isLValueReferenceType();
7636 bool IsNonDerefPointer = IsPointer &&
7637 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7638 !IsNonContiguous;
7639
7640 if (OASE)
7641 ++DimSize;
7642
7643 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7644 IsFinalArraySection) {
7645 // If this is not the last component, we expect the pointer to be
7646 // associated with an array expression or member expression.
7647 assert((Next == CE ||
7648 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7649 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7650 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7651 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7652 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7653 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7654 "Unexpected expression");
7655
7656 Address LB = Address::invalid();
7657 Address LowestElem = Address::invalid();
7658 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7659 const MemberExpr *E) {
7660 const Expr *BaseExpr = E->getBase();
7661 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7662 // scalar.
7663 LValue BaseLV;
7664 if (E->isArrow()) {
7665 LValueBaseInfo BaseInfo;
7666 TBAAAccessInfo TBAAInfo;
7667 Address Addr =
7668 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7669 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7670 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7671 } else {
7672 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7673 }
7674 return BaseLV;
7675 };
7676 if (OAShE) {
7677 LowestElem = LB =
7678 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7680 OAShE->getBase()->getType()->getPointeeType()),
7682 OAShE->getBase()->getType()));
7683 } else if (IsMemberReference) {
7684 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7685 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7686 LowestElem = CGF.EmitLValueForFieldInitialization(
7687 BaseLVal, cast<FieldDecl>(MapDecl))
7688 .getAddress();
7689 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7690 .getAddress();
7691 } else {
7692 LowestElem = LB =
7693 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7694 .getAddress();
7695 }
7696
7697 // If this component is a pointer inside the base struct then we don't
7698 // need to create any entry for it - it will be combined with the object
7699 // it is pointing to into a single PTR_AND_OBJ entry.
7700 bool IsMemberPointerOrAddr =
7701 EncounteredME &&
7702 (((IsPointer || ForDeviceAddr) &&
7703 I->getAssociatedExpression() == EncounteredME) ||
7704 (IsPrevMemberReference && !IsPointer) ||
7705 (IsMemberReference && Next != CE &&
7706 !Next->getAssociatedExpression()->getType()->isPointerType()));
7707 if (!OverlappedElements.empty() && Next == CE) {
7708 // Handle base element with the info for overlapped elements.
7709 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7710 assert(!IsPointer &&
7711 "Unexpected base element with the pointer type.");
7712 // Mark the whole struct as the struct that requires allocation on the
7713 // device.
7714 PartialStruct.LowestElem = {0, LowestElem};
7715 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7716 I->getAssociatedExpression()->getType());
7717 Address HB = CGF.Builder.CreateConstGEP(
7719 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7720 TypeSize.getQuantity() - 1);
7721 PartialStruct.HighestElem = {
7722 std::numeric_limits<decltype(
7723 PartialStruct.HighestElem.first)>::max(),
7724 HB};
7725 PartialStruct.Base = BP;
7726 PartialStruct.LB = LB;
7727 assert(
7728 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7729 "Overlapped elements must be used only once for the variable.");
7730 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7731 // Emit data for non-overlapped data.
7732 OpenMPOffloadMappingFlags Flags =
7733 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7734 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7735 /*AddPtrFlag=*/false,
7736 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7737 CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
7738 MapExpr, BP, LB, IsNonContiguous,
7739 DimSize);
7740 // Do bitcopy of all non-overlapped structure elements.
7742 Component : OverlappedElements) {
7743 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7744 Component) {
7745 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7746 if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
7747 CopyGaps.processField(MC, FD, EmitMemberExprBase);
7748 }
7749 }
7750 }
7751 }
7752 CopyGaps.copyUntilEnd(HB);
7753 break;
7754 }
7755 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7756 // Skip adding an entry in the CurInfo of this combined entry if the
7757 // whole struct is currently being mapped. The struct needs to be added
7758 // in the first position before any data internal to the struct is being
7759 // mapped.
7760 // Skip adding an entry in the CurInfo of this combined entry if the
7761 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7762 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7763 (Next == CE && MapType != OMPC_MAP_unknown)) {
7764 if (!IsMappingWholeStruct) {
7765 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7766 CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7767 CombinedInfo.DevicePtrDecls.push_back(nullptr);
7768 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7769 CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7770 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7771 Size, CGF.Int64Ty, /*isSigned=*/true));
7772 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7773 : 1);
7774 } else {
7775 StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7776 StructBaseCombinedInfo.BasePointers.push_back(
7777 BP.emitRawPointer(CGF));
7778 StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
7779 StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7780 StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7781 StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7782 Size, CGF.Int64Ty, /*isSigned=*/true));
7783 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7784 IsNonContiguous ? DimSize : 1);
7785 }
7786
7787 // If Mapper is valid, the last component inherits the mapper.
7788 bool HasMapper = Mapper && Next == CE;
7789 if (!IsMappingWholeStruct)
7790 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7791 else
7792 StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
7793 : nullptr);
7794
7795 // We need to add a pointer flag for each map that comes from the
7796 // same expression except for the first one. We also need to signal
7797 // this map is the first one that relates with the current capture
7798 // (there is a set of entries for each capture).
7799 OpenMPOffloadMappingFlags Flags =
7800 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7801 !IsExpressionFirstInfo || RequiresReference ||
7802 FirstPointerInComplexData || IsMemberReference,
7803 AreBothBasePtrAndPteeMapped ||
7804 (IsCaptureFirstInfo && !RequiresReference),
7805 IsNonContiguous);
7806
7807 if (!IsExpressionFirstInfo || IsMemberReference) {
7808 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7809 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7810 if (IsPointer || (IsMemberReference && Next != CE))
7811 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7812 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7813 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7814 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7815 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7816
7817 if (ShouldBeMemberOf) {
7818 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7819 // should be later updated with the correct value of MEMBER_OF.
7820 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7821 // From now on, all subsequent PTR_AND_OBJ entries should not be
7822 // marked as MEMBER_OF.
7823 ShouldBeMemberOf = false;
7824 }
7825 }
7826
7827 if (!IsMappingWholeStruct)
7828 CombinedInfo.Types.push_back(Flags);
7829 else
7830 StructBaseCombinedInfo.Types.push_back(Flags);
7831 }
7832
7833 // If we have encountered a member expression so far, keep track of the
7834 // mapped member. If the parent is "*this", then the value declaration
7835 // is nullptr.
7836 if (EncounteredME) {
7837 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7838 unsigned FieldIndex = FD->getFieldIndex();
7839
7840 // Update info about the lowest and highest elements for this struct
7841 if (!PartialStruct.Base.isValid()) {
7842 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7843 if (IsFinalArraySection && OASE) {
7844 Address HB =
7845 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7846 .getAddress();
7847 PartialStruct.HighestElem = {FieldIndex, HB};
7848 } else {
7849 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7850 }
7851 PartialStruct.Base = BP;
7852 PartialStruct.LB = BP;
7853 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7854 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7855 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7856 if (IsFinalArraySection && OASE) {
7857 Address HB =
7858 CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false)
7859 .getAddress();
7860 PartialStruct.HighestElem = {FieldIndex, HB};
7861 } else {
7862 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7863 }
7864 }
7865 }
7866
7867 // Need to emit combined struct for array sections.
7868 if (IsFinalArraySection || IsNonContiguous)
7869 PartialStruct.IsArraySection = true;
7870
7871 // If we have a final array section, we are done with this expression.
7872 if (IsFinalArraySection)
7873 break;
7874
7875 // The pointer becomes the base for the next element.
7876 if (Next != CE)
7877 BP = IsMemberReference ? LowestElem : LB;
7878 if (!IsPartialMapped)
7879 IsExpressionFirstInfo = false;
7880 IsCaptureFirstInfo = false;
7881 FirstPointerInComplexData = false;
7882 IsPrevMemberReference = IsMemberReference;
7883 } else if (FirstPointerInComplexData) {
7884 QualType Ty = Components.rbegin()
7885 ->getAssociatedDeclaration()
7886 ->getType()
7887 .getNonReferenceType();
7888 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7889 FirstPointerInComplexData = false;
7890 }
7891 }
7892 // If ran into the whole component - allocate the space for the whole
7893 // record.
7894 if (!EncounteredME)
7895 PartialStruct.HasCompleteRecord = true;
7896
7897 if (!IsNonContiguous)
7898 return;
7899
7900 const ASTContext &Context = CGF.getContext();
7901
7902 // For supporting stride in array section, we need to initialize the first
7903 // dimension size as 1, first offset as 0, and first count as 1
7904 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7905 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7906 MapValuesArrayTy CurStrides;
7907 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7908 uint64_t ElementTypeSize;
7909
7910 // Collect Size information for each dimension and get the element size as
7911 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7912 // should be [10, 10] and the first stride is 4 btyes.
7913 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7914 Components) {
7915 const Expr *AssocExpr = Component.getAssociatedExpression();
7916 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7917
7918 if (!OASE)
7919 continue;
7920
7921 QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase());
7922 auto *CAT = Context.getAsConstantArrayType(Ty);
7923 auto *VAT = Context.getAsVariableArrayType(Ty);
7924
7925 // We need all the dimension size except for the last dimension.
7926 assert((VAT || CAT || &Component == &*Components.begin()) &&
7927 "Should be either ConstantArray or VariableArray if not the "
7928 "first Component");
7929
7930 // Get element size if CurStrides is empty.
7931 if (CurStrides.empty()) {
7932 const Type *ElementType = nullptr;
7933 if (CAT)
7934 ElementType = CAT->getElementType().getTypePtr();
7935 else if (VAT)
7936 ElementType = VAT->getElementType().getTypePtr();
7937 else
7938 assert(&Component == &*Components.begin() &&
7939 "Only expect pointer (non CAT or VAT) when this is the "
7940 "first Component");
7941 // If ElementType is null, then it means the base is a pointer
7942 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7943 // for next iteration.
7944 if (ElementType) {
7945 // For the case that having pointer as base, we need to remove one
7946 // level of indirection.
7947 if (&Component != &*Components.begin())
7948 ElementType = ElementType->getPointeeOrArrayElementType();
7949 ElementTypeSize =
7950 Context.getTypeSizeInChars(ElementType).getQuantity();
7951 CurStrides.push_back(
7952 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7953 }
7954 }
7955 // Get dimension value except for the last dimension since we don't need
7956 // it.
7957 if (DimSizes.size() < Components.size() - 1) {
7958 if (CAT)
7959 DimSizes.push_back(
7960 llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize()));
7961 else if (VAT)
7962 DimSizes.push_back(CGF.Builder.CreateIntCast(
7963 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7964 /*IsSigned=*/false));
7965 }
7966 }
7967
7968 // Skip the dummy dimension since we have already have its information.
7969 auto *DI = DimSizes.begin() + 1;
7970 // Product of dimension.
7971 llvm::Value *DimProd =
7972 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
7973
7974 // Collect info for non-contiguous. Notice that offset, count, and stride
7975 // are only meaningful for array-section, so we insert a null for anything
7976 // other than array-section.
7977 // Also, the size of offset, count, and stride are not the same as
7978 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7979 // count, and stride are the same as the number of non-contiguous
7980 // declaration in target update to/from clause.
7981 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7982 Components) {
7983 const Expr *AssocExpr = Component.getAssociatedExpression();
7984
7985 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7986 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7987 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7988 /*isSigned=*/false);
7989 CurOffsets.push_back(Offset);
7990 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7991 CurStrides.push_back(CurStrides.back());
7992 continue;
7993 }
7994
7995 const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
7996
7997 if (!OASE)
7998 continue;
7999
8000 // Offset
8001 const Expr *OffsetExpr = OASE->getLowerBound();
8002 llvm::Value *Offset = nullptr;
8003 if (!OffsetExpr) {
8004 // If offset is absent, then we just set it to zero.
8005 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8006 } else {
8007 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8008 CGF.Int64Ty,
8009 /*isSigned=*/false);
8010 }
8011 CurOffsets.push_back(Offset);
8012
8013 // Count
8014 const Expr *CountExpr = OASE->getLength();
8015 llvm::Value *Count = nullptr;
8016 if (!CountExpr) {
8017 // In Clang, once a high dimension is an array section, we construct all
8018 // the lower dimension as array section, however, for case like
8019 // arr[0:2][2], Clang construct the inner dimension as an array section
8020 // but it actually is not in an array section form according to spec.
8021 if (!OASE->getColonLocFirst().isValid() &&
8022 !OASE->getColonLocSecond().isValid()) {
8023 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8024 } else {
8025 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8026 // When the length is absent it defaults to ⌈(size −
8027 // lower-bound)/stride⌉, where size is the size of the array
8028 // dimension.
8029 const Expr *StrideExpr = OASE->getStride();
8030 llvm::Value *Stride =
8031 StrideExpr
8032 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8033 CGF.Int64Ty, /*isSigned=*/false)
8034 : nullptr;
8035 if (Stride)
8036 Count = CGF.Builder.CreateUDiv(
8037 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8038 else
8039 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8040 }
8041 } else {
8042 Count = CGF.EmitScalarExpr(CountExpr);
8043 }
8044 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8045 CurCounts.push_back(Count);
8046
8047 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8048 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8049 // Offset Count Stride
8050 // D0 0 1 4 (int) <- dummy dimension
8051 // D1 0 2 8 (2 * (1) * 4)
8052 // D2 1 2 20 (1 * (1 * 5) * 4)
8053 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8054 const Expr *StrideExpr = OASE->getStride();
8055 llvm::Value *Stride =
8056 StrideExpr
8057 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8058 CGF.Int64Ty, /*isSigned=*/false)
8059 : nullptr;
8060 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8061 if (Stride)
8062 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8063 else
8064 CurStrides.push_back(DimProd);
8065 if (DI != DimSizes.end())
8066 ++DI;
8067 }
8068
8069 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8070 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8071 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8072 }
8073
8074 /// Return the adjusted map modifiers if the declaration a capture refers to
8075 /// appears in a first-private clause. This is expected to be used only with
8076 /// directives that start with 'target'.
8077 OpenMPOffloadMappingFlags
8078 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8079 assert(Cap.capturesVariable() && "Expected capture by reference only!");
8080
8081 // A first private variable captured by reference will use only the
8082 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8083 // declaration is known as first-private in this handler.
8084 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8085 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8086 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8087 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
8088 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
8089 OpenMPOffloadMappingFlags::OMP_MAP_TO;
8090 }
8091 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8092 if (I != LambdasMap.end())
8093 // for map(to: lambda): using user specified map type.
8094 return getMapTypeBits(
8095 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8096 /*MotionModifiers=*/{}, I->getSecond()->isImplicit(),
8097 /*AddPtrFlag=*/false,
8098 /*AddIsTargetParamFlag=*/false,
8099 /*isNonContiguous=*/false);
8100 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
8101 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
8102 }
8103
8104 void getPlainLayout(const CXXRecordDecl *RD,
8105 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8106 bool AsBase) const {
8107 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8108
8109 llvm::StructType *St =
8110 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8111
8112 unsigned NumElements = St->getNumElements();
8113 llvm::SmallVector<
8114 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8115 RecordLayout(NumElements);
8116
8117 // Fill bases.
8118 for (const auto &I : RD->bases()) {
8119 if (I.isVirtual())
8120 continue;
8121
8122 QualType BaseTy = I.getType();
8123 const auto *Base = BaseTy->getAsCXXRecordDecl();
8124 // Ignore empty bases.
8125 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) ||
8126 CGF.getContext()
8127 .getASTRecordLayout(Base)
8129 .isZero())
8130 continue;
8131
8132 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8133 RecordLayout[FieldIndex] = Base;
8134 }
8135 // Fill in virtual bases.
8136 for (const auto &I : RD->vbases()) {
8137 QualType BaseTy = I.getType();
8138 // Ignore empty bases.
8139 if (isEmptyRecordForLayout(CGF.getContext(), BaseTy))
8140 continue;
8141
8142 const auto *Base = BaseTy->getAsCXXRecordDecl();
8143 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8144 if (RecordLayout[FieldIndex])
8145 continue;
8146 RecordLayout[FieldIndex] = Base;
8147 }
8148 // Fill in all the fields.
8149 assert(!RD->isUnion() && "Unexpected union.");
8150 for (const auto *Field : RD->fields()) {
8151 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8152 // will fill in later.)
8153 if (!Field->isBitField() &&
8154 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8155 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8156 RecordLayout[FieldIndex] = Field;
8157 }
8158 }
8159 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8160 &Data : RecordLayout) {
8161 if (Data.isNull())
8162 continue;
8163 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8164 getPlainLayout(Base, Layout, /*AsBase=*/true);
8165 else
8166 Layout.push_back(cast<const FieldDecl *>(Data));
8167 }
8168 }
8169
8170 /// Generate all the base pointers, section pointers, sizes, map types, and
8171 /// mappers for the extracted mappable expressions (all included in \a
8172 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8173 /// pair of the relevant declaration and index where it occurs is appended to
8174 /// the device pointers info array.
8175 void generateAllInfoForClauses(
8176 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8177 llvm::OpenMPIRBuilder &OMPBuilder,
8178 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8179 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8180 // We have to process the component lists that relate with the same
8181 // declaration in a single chunk so that we can generate the map flags
8182 // correctly. Therefore, we organize all lists in a map.
8183 enum MapKind { Present, Allocs, Other, Total };
8184 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8185 SmallVector<SmallVector<MapInfo, 8>, 4>>
8186 Info;
8187
8188 // Helper function to fill the information map for the different supported
8189 // clauses.
8190 auto &&InfoGen =
8191 [&Info, &SkipVarSet](
8192 const ValueDecl *D, MapKind Kind,
8194 OpenMPMapClauseKind MapType,
8195 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8196 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8197 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8198 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8199 if (SkipVarSet.contains(D))
8200 return;
8201 auto It = Info.try_emplace(D, Total).first;
8202 It->second[Kind].emplace_back(
8203 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8204 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8205 };
8206
8207 for (const auto *Cl : Clauses) {
8208 const auto *C = dyn_cast<OMPMapClause>(Cl);
8209 if (!C)
8210 continue;
8211 MapKind Kind = Other;
8212 if (llvm::is_contained(C->getMapTypeModifiers(),
8213 OMPC_MAP_MODIFIER_present))
8214 Kind = Present;
8215 else if (C->getMapType() == OMPC_MAP_alloc)
8216 Kind = Allocs;
8217 const auto *EI = C->getVarRefs().begin();
8218 for (const auto L : C->component_lists()) {
8219 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8220 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8221 C->getMapTypeModifiers(), {},
8222 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8223 E);
8224 ++EI;
8225 }
8226 }
8227 for (const auto *Cl : Clauses) {
8228 const auto *C = dyn_cast<OMPToClause>(Cl);
8229 if (!C)
8230 continue;
8231 MapKind Kind = Other;
8232 if (llvm::is_contained(C->getMotionModifiers(),
8233 OMPC_MOTION_MODIFIER_present))
8234 Kind = Present;
8235 const auto *EI = C->getVarRefs().begin();
8236 for (const auto L : C->component_lists()) {
8237 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8238 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8239 C->isImplicit(), std::get<2>(L), *EI);
8240 ++EI;
8241 }
8242 }
8243 for (const auto *Cl : Clauses) {
8244 const auto *C = dyn_cast<OMPFromClause>(Cl);
8245 if (!C)
8246 continue;
8247 MapKind Kind = Other;
8248 if (llvm::is_contained(C->getMotionModifiers(),
8249 OMPC_MOTION_MODIFIER_present))
8250 Kind = Present;
8251 const auto *EI = C->getVarRefs().begin();
8252 for (const auto L : C->component_lists()) {
8253 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8254 C->getMotionModifiers(),
8255 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8256 *EI);
8257 ++EI;
8258 }
8259 }
8260
8261 // Look at the use_device_ptr and use_device_addr clauses information and
8262 // mark the existing map entries as such. If there is no map information for
8263 // an entry in the use_device_ptr and use_device_addr list, we create one
8264 // with map type 'alloc' and zero size section. It is the user fault if that
8265 // was not mapped before. If there is no map information and the pointer is
8266 // a struct member, then we defer the emission of that entry until the whole
8267 // struct has been processed.
8268 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8269 SmallVector<DeferredDevicePtrEntryTy, 4>>
8270 DeferredInfo;
8271 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8272
8273 auto &&UseDeviceDataCombinedInfoGen =
8274 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8275 CodeGenFunction &CGF, bool IsDevAddr) {
8276 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8277 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr);
8278 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD);
8279 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8280 IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8281 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8282 UseDeviceDataCombinedInfo.Sizes.push_back(
8283 llvm::Constant::getNullValue(CGF.Int64Ty));
8284 UseDeviceDataCombinedInfo.Types.push_back(
8285 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8286 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8287 };
8288
8289 auto &&MapInfoGen =
8290 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8291 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8293 Components,
8294 bool IsImplicit, bool IsDevAddr) {
8295 // We didn't find any match in our map information - generate a zero
8296 // size array section - if the pointer is a struct member we defer
8297 // this action until the whole struct has been processed.
8298 if (isa<MemberExpr>(IE)) {
8299 // Insert the pointer into Info to be processed by
8300 // generateInfoForComponentList. Because it is a member pointer
8301 // without a pointee, no entry will be generated for it, therefore
8302 // we need to generate one after the whole struct has been
8303 // processed. Nonetheless, generateInfoForComponentList must be
8304 // called to take the pointer into account for the calculation of
8305 // the range of the partial struct.
8306 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8307 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8308 IsDevAddr);
8309 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8310 } else {
8311 llvm::Value *Ptr;
8312 if (IsDevAddr) {
8313 if (IE->isGLValue())
8314 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8315 else
8316 Ptr = CGF.EmitScalarExpr(IE);
8317 } else {
8318 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8319 }
8320 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8321 }
8322 };
8323
8324 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8325 const Expr *IE, bool IsDevAddr) -> bool {
8326 // We potentially have map information for this declaration already.
8327 // Look for the first set of components that refer to it. If found,
8328 // return true.
8329 // If the first component is a member expression, we have to look into
8330 // 'this', which maps to null in the map of map information. Otherwise
8331 // look directly for the information.
8332 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8333 if (It != Info.end()) {
8334 bool Found = false;
8335 for (auto &Data : It->second) {
8336 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8337 return MI.Components.back().getAssociatedDeclaration() == VD;
8338 });
8339 // If we found a map entry, signal that the pointer has to be
8340 // returned and move on to the next declaration. Exclude cases where
8341 // the base pointer is mapped as array subscript, array section or
8342 // array shaping. The base address is passed as a pointer to base in
8343 // this case and cannot be used as a base for use_device_ptr list
8344 // item.
8345 if (CI != Data.end()) {
8346 if (IsDevAddr) {
8347 CI->ForDeviceAddr = IsDevAddr;
8348 CI->ReturnDevicePointer = true;
8349 Found = true;
8350 break;
8351 } else {
8352 auto PrevCI = std::next(CI->Components.rbegin());
8353 const auto *VarD = dyn_cast<VarDecl>(VD);
8355 isa<MemberExpr>(IE) ||
8356 !VD->getType().getNonReferenceType()->isPointerType() ||
8357 PrevCI == CI->Components.rend() ||
8358 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8359 VarD->hasLocalStorage()) {
8360 CI->ForDeviceAddr = IsDevAddr;
8361 CI->ReturnDevicePointer = true;
8362 Found = true;
8363 break;
8364 }
8365 }
8366 }
8367 }
8368 return Found;
8369 }
8370 return false;
8371 };
8372
8373 // Look at the use_device_ptr clause information and mark the existing map
8374 // entries as such. If there is no map information for an entry in the
8375 // use_device_ptr list, we create one with map type 'alloc' and zero size
8376 // section. It is the user fault if that was not mapped before. If there is
8377 // no map information and the pointer is a struct member, then we defer the
8378 // emission of that entry until the whole struct has been processed.
8379 for (const auto *Cl : Clauses) {
8380 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8381 if (!C)
8382 continue;
8383 for (const auto L : C->component_lists()) {
8385 std::get<1>(L);
8386 assert(!Components.empty() &&
8387 "Not expecting empty list of components!");
8388 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8390 const Expr *IE = Components.back().getAssociatedExpression();
8391 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8392 continue;
8393 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8394 /*IsDevAddr=*/false);
8395 }
8396 }
8397
8398 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8399 for (const auto *Cl : Clauses) {
8400 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8401 if (!C)
8402 continue;
8403 for (const auto L : C->component_lists()) {
8405 std::get<1>(L);
8406 assert(!std::get<1>(L).empty() &&
8407 "Not expecting empty list of components!");
8408 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8409 if (!Processed.insert(VD).second)
8410 continue;
8412 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8413 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8414 continue;
8415 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8416 /*IsDevAddr=*/true);
8417 }
8418 }
8419
8420 for (const auto &Data : Info) {
8421 StructRangeInfoTy PartialStruct;
8422 // Current struct information:
8423 MapCombinedInfoTy CurInfo;
8424 // Current struct base information:
8425 MapCombinedInfoTy StructBaseCurInfo;
8426 const Decl *D = Data.first;
8427 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8428 bool HasMapBasePtr = false;
8429 bool HasMapArraySec = false;
8430 if (VD && VD->getType()->isAnyPointerType()) {
8431 for (const auto &M : Data.second) {
8432 HasMapBasePtr = any_of(M, [](const MapInfo &L) {
8433 return isa_and_present<DeclRefExpr>(L.VarRef);
8434 });
8435 HasMapArraySec = any_of(M, [](const MapInfo &L) {
8436 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8437 L.VarRef);
8438 });
8439 if (HasMapBasePtr && HasMapArraySec)
8440 break;
8441 }
8442 }
8443 for (const auto &M : Data.second) {
8444 for (const MapInfo &L : M) {
8445 assert(!L.Components.empty() &&
8446 "Not expecting declaration with no component lists.");
8447
8448 // Remember the current base pointer index.
8449 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8450 unsigned StructBasePointersIdx =
8451 StructBaseCurInfo.BasePointers.size();
8452 CurInfo.NonContigInfo.IsNonContiguous =
8453 L.Components.back().isNonContiguous();
8454 generateInfoForComponentList(
8455 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8456 CurInfo, StructBaseCurInfo, PartialStruct,
8457 /*IsFirstComponentList=*/false, L.IsImplicit,
8458 /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
8459 L.VarRef, /*OverlappedElements*/ {},
8460 HasMapBasePtr && HasMapArraySec);
8461
8462 // If this entry relates to a device pointer, set the relevant
8463 // declaration and add the 'return pointer' flag.
8464 if (L.ReturnDevicePointer) {
8465 // Check whether a value was added to either CurInfo or
8466 // StructBaseCurInfo and error if no value was added to either of
8467 // them:
8468 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8469 StructBasePointersIdx <
8470 StructBaseCurInfo.BasePointers.size()) &&
8471 "Unexpected number of mapped base pointers.");
8472
8473 // Choose a base pointer index which is always valid:
8474 const ValueDecl *RelevantVD =
8475 L.Components.back().getAssociatedDeclaration();
8476 assert(RelevantVD &&
8477 "No relevant declaration related with device pointer??");
8478
8479 // If StructBaseCurInfo has been updated this iteration then work on
8480 // the first new entry added to it i.e. make sure that when multiple
8481 // values are added to any of the lists, the first value added is
8482 // being modified by the assignments below (not the last value
8483 // added).
8484 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8485 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8486 RelevantVD;
8487 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8488 L.ForDeviceAddr ? DeviceInfoTy::Address
8489 : DeviceInfoTy::Pointer;
8490 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8491 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8492 } else {
8493 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8494 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8495 L.ForDeviceAddr ? DeviceInfoTy::Address
8496 : DeviceInfoTy::Pointer;
8497 CurInfo.Types[CurrentBasePointersIdx] |=
8498 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8499 }
8500 }
8501 }
8502 }
8503
8504 // Append any pending zero-length pointers which are struct members and
8505 // used with use_device_ptr or use_device_addr.
8506 auto CI = DeferredInfo.find(Data.first);
8507 if (CI != DeferredInfo.end()) {
8508 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8509 llvm::Value *BasePtr;
8510 llvm::Value *Ptr;
8511 if (L.ForDeviceAddr) {
8512 if (L.IE->isGLValue())
8513 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8514 else
8515 Ptr = this->CGF.EmitScalarExpr(L.IE);
8516 BasePtr = Ptr;
8517 // Entry is RETURN_PARAM. Also, set the placeholder value
8518 // MEMBER_OF=FFFF so that the entry is later updated with the
8519 // correct value of MEMBER_OF.
8520 CurInfo.Types.push_back(
8521 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8522 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8523 } else {
8524 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8525 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8526 L.IE->getExprLoc());
8527 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8528 // placeholder value MEMBER_OF=FFFF so that the entry is later
8529 // updated with the correct value of MEMBER_OF.
8530 CurInfo.Types.push_back(
8531 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8532 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8533 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8534 }
8535 CurInfo.Exprs.push_back(L.VD);
8536 CurInfo.BasePointers.emplace_back(BasePtr);
8537 CurInfo.DevicePtrDecls.emplace_back(L.VD);
8538 CurInfo.DevicePointers.emplace_back(
8539 L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8540 CurInfo.Pointers.push_back(Ptr);
8541 CurInfo.Sizes.push_back(
8542 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8543 CurInfo.Mappers.push_back(nullptr);
8544 }
8545 }
8546
8547 // Unify entries in one list making sure the struct mapping precedes the
8548 // individual fields:
8549 MapCombinedInfoTy UnionCurInfo;
8550 UnionCurInfo.append(StructBaseCurInfo);
8551 UnionCurInfo.append(CurInfo);
8552
8553 // If there is an entry in PartialStruct it means we have a struct with
8554 // individual members mapped. Emit an extra combined entry.
8555 if (PartialStruct.Base.isValid()) {
8556 UnionCurInfo.NonContigInfo.Dims.push_back(0);
8557 // Emit a combined entry:
8558 emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
8559 /*IsMapThis*/ !VD, OMPBuilder, VD);
8560 }
8561
8562 // We need to append the results of this capture to what we already have.
8563 CombinedInfo.append(UnionCurInfo);
8564 }
8565 // Append data for use_device_ptr clauses.
8566 CombinedInfo.append(UseDeviceDataCombinedInfo);
8567 }
8568
8569public:
8570 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8571 : CurDir(&Dir), CGF(CGF) {
8572 // Extract firstprivate clause information.
8573 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8574 for (const auto *D : C->varlist())
8575 FirstPrivateDecls.try_emplace(
8576 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8577 // Extract implicit firstprivates from uses_allocators clauses.
8578 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8579 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8580 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8581 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8582 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8583 /*Implicit=*/true);
8584 else if (const auto *VD = dyn_cast<VarDecl>(
8585 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8586 ->getDecl()))
8587 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8588 }
8589 }
8590 // Extract device pointer clause information.
8591 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8592 for (auto L : C->component_lists())
8593 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8594 // Extract device addr clause information.
8595 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8596 for (auto L : C->component_lists())
8597 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8598 // Extract map information.
8599 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8600 if (C->getMapType() != OMPC_MAP_to)
8601 continue;
8602 for (auto L : C->component_lists()) {
8603 const ValueDecl *VD = std::get<0>(L);
8604 const auto *RD = VD ? VD->getType()
8605 .getCanonicalType()
8606 .getNonReferenceType()
8607 ->getAsCXXRecordDecl()
8608 : nullptr;
8609 if (RD && RD->isLambda())
8610 LambdasMap.try_emplace(std::get<0>(L), C);
8611 }
8612 }
8613 }
8614
8615 /// Constructor for the declare mapper directive.
8616 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8617 : CurDir(&Dir), CGF(CGF) {}
8618
8619 /// Generate code for the combined entry if we have a partially mapped struct
8620 /// and take care of the mapping flags of the arguments corresponding to
8621 /// individual struct members.
8622 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8623 MapFlagsArrayTy &CurTypes,
8624 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8625 llvm::OpenMPIRBuilder &OMPBuilder,
8626 const ValueDecl *VD = nullptr,
8627 unsigned OffsetForMemberOfFlag = 0,
8628 bool NotTargetParams = true) const {
8629 if (CurTypes.size() == 1 &&
8630 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8631 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8632 !PartialStruct.IsArraySection)
8633 return;
8634 Address LBAddr = PartialStruct.LowestElem.second;
8635 Address HBAddr = PartialStruct.HighestElem.second;
8636 if (PartialStruct.HasCompleteRecord) {
8637 LBAddr = PartialStruct.LB;
8638 HBAddr = PartialStruct.LB;
8639 }
8640 CombinedInfo.Exprs.push_back(VD);
8641 // Base is the base of the struct
8642 CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8643 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8644 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8645 // Pointer is the address of the lowest element
8646 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8647 const CXXMethodDecl *MD =
8648 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8649 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8650 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8651 // There should not be a mapper for a combined entry.
8652 if (HasBaseClass) {
8653 // OpenMP 5.2 148:21:
8654 // If the target construct is within a class non-static member function,
8655 // and a variable is an accessible data member of the object for which the
8656 // non-static data member function is invoked, the variable is treated as
8657 // if the this[:1] expression had appeared in a map clause with a map-type
8658 // of tofrom.
8659 // Emit this[:1]
8660 CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF));
8661 QualType Ty = MD->getFunctionObjectParameterType();
8662 llvm::Value *Size =
8663 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8664 /*isSigned=*/true);
8665 CombinedInfo.Sizes.push_back(Size);
8666 } else {
8667 CombinedInfo.Pointers.push_back(LB);
8668 // Size is (addr of {highest+1} element) - (addr of lowest element)
8669 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8670 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8671 HBAddr.getElementType(), HB, /*Idx0=*/1);
8672 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8673 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8674 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8675 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8676 /*isSigned=*/false);
8677 CombinedInfo.Sizes.push_back(Size);
8678 }
8679 CombinedInfo.Mappers.push_back(nullptr);
8680 // Map type is always TARGET_PARAM, if generate info for captures.
8681 CombinedInfo.Types.push_back(
8682 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8683 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8684 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8685 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8686 // If any element has the present modifier, then make sure the runtime
8687 // doesn't attempt to allocate the struct.
8688 if (CurTypes.end() !=
8689 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8690 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8691 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8692 }))
8693 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8694 // Remove TARGET_PARAM flag from the first element
8695 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8696 // If any element has the ompx_hold modifier, then make sure the runtime
8697 // uses the hold reference count for the struct as a whole so that it won't
8698 // be unmapped by an extra dynamic reference count decrement. Add it to all
8699 // elements as well so the runtime knows which reference count to check
8700 // when determining whether it's time for device-to-host transfers of
8701 // individual elements.
8702 if (CurTypes.end() !=
8703 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8704 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8705 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8706 })) {
8707 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8708 for (auto &M : CurTypes)
8709 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8710 }
8711
8712 // All other current entries will be MEMBER_OF the combined entry
8713 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8714 // 0xFFFF in the MEMBER_OF field).
8715 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
8716 OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
8717 for (auto &M : CurTypes)
8718 OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag);
8719 }
8720
8721 /// Generate all the base pointers, section pointers, sizes, map types, and
8722 /// mappers for the extracted mappable expressions (all included in \a
8723 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8724 /// pair of the relevant declaration and index where it occurs is appended to
8725 /// the device pointers info array.
8726 void generateAllInfo(
8727 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8728 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8729 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8730 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8731 "Expect a executable directive");
8732 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8733 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8734 SkipVarSet);
8735 }
8736
8737 /// Generate all the base pointers, section pointers, sizes, map types, and
8738 /// mappers for the extracted map clauses of user-defined mapper (all included
8739 /// in \a CombinedInfo).
8740 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8741 llvm::OpenMPIRBuilder &OMPBuilder) const {
8742 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8743 "Expect a declare mapper directive");
8744 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(CurDir);
8745 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo,
8746 OMPBuilder);
8747 }
8748
8749 /// Emit capture info for lambdas for variables captured by reference.
8750 void generateInfoForLambdaCaptures(
8751 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8752 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8753 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8754 const auto *RD = VDType->getAsCXXRecordDecl();
8755 if (!RD || !RD->isLambda())
8756 return;
8757 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8758 CGF.getContext().getDeclAlign(VD));
8759 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8760 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8761 FieldDecl *ThisCapture = nullptr;
8762 RD->getCaptureFields(Captures, ThisCapture);
8763 if (ThisCapture) {
8764 LValue ThisLVal =
8765 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8766 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8767 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8768 VDLVal.getPointer(CGF));
8769 CombinedInfo.Exprs.push_back(VD);
8770 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8771 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8772 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8773 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8774 CombinedInfo.Sizes.push_back(
8775 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8776 CGF.Int64Ty, /*isSigned=*/true));
8777 CombinedInfo.Types.push_back(
8778 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8779 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8780 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8781 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8782 CombinedInfo.Mappers.push_back(nullptr);
8783 }
8784 for (const LambdaCapture &LC : RD->captures()) {
8785 if (!LC.capturesVariable())
8786 continue;
8787 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8788 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8789 continue;
8790 auto It = Captures.find(VD);
8791 assert(It != Captures.end() && "Found lambda capture without field.");
8792 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8793 if (LC.getCaptureKind() == LCK_ByRef) {
8794 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8795 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8796 VDLVal.getPointer(CGF));
8797 CombinedInfo.Exprs.push_back(VD);
8798 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8799 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8800 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8801 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8802 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8803 CGF.getTypeSize(
8805 CGF.Int64Ty, /*isSigned=*/true));
8806 } else {
8807 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8808 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8809 VDLVal.getPointer(CGF));
8810 CombinedInfo.Exprs.push_back(VD);
8811 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8812 CombinedInfo.DevicePtrDecls.push_back(nullptr);
8813 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
8814 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8815 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8816 }
8817 CombinedInfo.Types.push_back(
8818 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8819 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8820 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8821 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8822 CombinedInfo.Mappers.push_back(nullptr);
8823 }
8824 }
8825
8826 /// Set correct indices for lambdas captures.
8827 void adjustMemberOfForLambdaCaptures(
8828 llvm::OpenMPIRBuilder &OMPBuilder,
8829 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8830 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8831 MapFlagsArrayTy &Types) const {
8832 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8833 // Set correct member_of idx for all implicit lambda captures.
8834 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8835 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8836 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8837 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8838 continue;
8839 llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]);
8840 assert(BasePtr && "Unable to find base lambda address.");
8841 int TgtIdx = -1;
8842 for (unsigned J = I; J > 0; --J) {
8843 unsigned Idx = J - 1;
8844 if (Pointers[Idx] != BasePtr)
8845 continue;
8846 TgtIdx = Idx;
8847 break;
8848 }
8849 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8850 // All other current entries will be MEMBER_OF the combined entry
8851 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8852 // 0xFFFF in the MEMBER_OF field).
8853 OpenMPOffloadMappingFlags MemberOfFlag =
8854 OMPBuilder.getMemberOfFlag(TgtIdx);
8855 OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8856 }
8857 }
8858
8859 /// For a capture that has an associated clause, generate the base pointers,
8860 /// section pointers, sizes, map types, and mappers (all included in
8861 /// \a CurCaptureVarInfo).
8862 void generateInfoForCaptureFromClauseInfo(
8863 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
8864 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8865 unsigned OffsetForMemberOfFlag) const {
8866 assert(!Cap->capturesVariableArrayType() &&
8867 "Not expecting to generate map info for a variable array type!");
8868
8869 // We need to know when we generating information for the first component
8870 const ValueDecl *VD = Cap->capturesThis()
8871 ? nullptr
8872 : Cap->getCapturedVar()->getCanonicalDecl();
8873
8874 // for map(to: lambda): skip here, processing it in
8875 // generateDefaultMapInfo
8876 if (LambdasMap.count(VD))
8877 return;
8878
8879 // If this declaration appears in a is_device_ptr clause we just have to
8880 // pass the pointer by value. If it is a reference to a declaration, we just
8881 // pass its value.
8882 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8883 CurCaptureVarInfo.Exprs.push_back(VD);
8884 CurCaptureVarInfo.BasePointers.emplace_back(Arg);
8885 CurCaptureVarInfo.DevicePtrDecls.emplace_back(VD);
8886 CurCaptureVarInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer);
8887 CurCaptureVarInfo.Pointers.push_back(Arg);
8888 CurCaptureVarInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8889 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8890 /*isSigned=*/true));
8891 CurCaptureVarInfo.Types.push_back(
8892 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8893 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8894 CurCaptureVarInfo.Mappers.push_back(nullptr);
8895 return;
8896 }
8897
8898 MapDataArrayTy DeclComponentLists;
8899 // For member fields list in is_device_ptr, store it in
8900 // DeclComponentLists for generating components info.
8902 auto It = DevPointersMap.find(VD);
8903 if (It != DevPointersMap.end())
8904 for (const auto &MCL : It->second)
8905 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8906 /*IsImpicit = */ true, nullptr,
8907 nullptr);
8908 auto I = HasDevAddrsMap.find(VD);
8909 if (I != HasDevAddrsMap.end())
8910 for (const auto &MCL : I->second)
8911 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8912 /*IsImpicit = */ true, nullptr,
8913 nullptr);
8914 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8915 "Expect a executable directive");
8916 const auto *CurExecDir = cast<const OMPExecutableDirective *>(CurDir);
8917 bool HasMapBasePtr = false;
8918 bool HasMapArraySec = false;
8919 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8920 const auto *EI = C->getVarRefs().begin();
8921 for (const auto L : C->decl_component_lists(VD)) {
8922 const ValueDecl *VDecl, *Mapper;
8923 // The Expression is not correct if the mapping is implicit
8924 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8926 std::tie(VDecl, Components, Mapper) = L;
8927 assert(VDecl == VD && "We got information for the wrong declaration??");
8928 assert(!Components.empty() &&
8929 "Not expecting declaration with no component lists.");
8930 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8931 HasMapBasePtr = true;
8932 if (VD && E && VD->getType()->isAnyPointerType() &&
8934 HasMapArraySec = true;
8935 DeclComponentLists.emplace_back(Components, C->getMapType(),
8936 C->getMapTypeModifiers(),
8937 C->isImplicit(), Mapper, E);
8938 ++EI;
8939 }
8940 }
8941 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8942 const MapData &RHS) {
8943 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8944 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8945 bool HasPresent =
8946 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8947 bool HasAllocs = MapType == OMPC_MAP_alloc;
8948 MapModifiers = std::get<2>(RHS);
8949 MapType = std::get<1>(LHS);
8950 bool HasPresentR =
8951 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8952 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8953 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8954 });
8955
8956 auto GenerateInfoForComponentLists =
8957 [&](ArrayRef<MapData> DeclComponentLists,
8958 bool IsEligibleForTargetParamFlag) {
8959 MapCombinedInfoTy CurInfoForComponentLists;
8960 StructRangeInfoTy PartialStruct;
8961
8962 if (DeclComponentLists.empty())
8963 return;
8964
8965 generateInfoForCaptureFromComponentLists(
8966 VD, DeclComponentLists, CurInfoForComponentLists, PartialStruct,
8967 IsEligibleForTargetParamFlag,
8968 /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
8969
8970 // If there is an entry in PartialStruct it means we have a
8971 // struct with individual members mapped. Emit an extra combined
8972 // entry.
8973 if (PartialStruct.Base.isValid()) {
8974 CurCaptureVarInfo.append(PartialStruct.PreliminaryMapData);
8975 emitCombinedEntry(
8976 CurCaptureVarInfo, CurInfoForComponentLists.Types,
8977 PartialStruct, Cap->capturesThis(), OMPBuilder, nullptr,
8978 OffsetForMemberOfFlag,
8979 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
8980 }
8981
8982 // Return if we didn't add any entries.
8983 if (CurInfoForComponentLists.BasePointers.empty())
8984 return;
8985
8986 CurCaptureVarInfo.append(CurInfoForComponentLists);
8987 };
8988
8989 GenerateInfoForComponentLists(DeclComponentLists,
8990 /*IsEligibleForTargetParamFlag=*/true);
8991 }
8992
8993 /// Generate the base pointers, section pointers, sizes, map types, and
8994 /// mappers associated to \a DeclComponentLists for a given capture
8995 /// \a VD (all included in \a CurComponentListInfo).
8996 void generateInfoForCaptureFromComponentLists(
8997 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
8998 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
8999 bool IsListEligibleForTargetParamFlag,
9000 bool AreBothBasePtrAndPteeMapped = false) const {
9001 // Find overlapping elements (including the offset from the base element).
9002 llvm::SmallDenseMap<
9003 const MapData *,
9004 llvm::SmallVector<
9006 4>
9007 OverlappedData;
9008 size_t Count = 0;
9009 for (const MapData &L : DeclComponentLists) {
9011 OpenMPMapClauseKind MapType;
9012 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9013 bool IsImplicit;
9014 const ValueDecl *Mapper;
9015 const Expr *VarRef;
9016 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9017 L;
9018 ++Count;
9019 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
9021 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9022 VarRef) = L1;
9023 auto CI = Components.rbegin();
9024 auto CE = Components.rend();
9025 auto SI = Components1.rbegin();
9026 auto SE = Components1.rend();
9027 for (; CI != CE && SI != SE; ++CI, ++SI) {
9028 if (CI->getAssociatedExpression()->getStmtClass() !=
9029 SI->getAssociatedExpression()->getStmtClass())
9030 break;
9031 // Are we dealing with different variables/fields?
9032 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9033 break;
9034 }
9035 // Found overlapping if, at least for one component, reached the head
9036 // of the components list.
9037 if (CI == CE || SI == SE) {
9038 // Ignore it if it is the same component.
9039 if (CI == CE && SI == SE)
9040 continue;
9041 const auto It = (SI == SE) ? CI : SI;
9042 // If one component is a pointer and another one is a kind of
9043 // dereference of this pointer (array subscript, section, dereference,
9044 // etc.), it is not an overlapping.
9045 // Same, if one component is a base and another component is a
9046 // dereferenced pointer memberexpr with the same base.
9047 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9048 (std::prev(It)->getAssociatedDeclaration() &&
9049 std::prev(It)
9050 ->getAssociatedDeclaration()
9051 ->getType()
9052 ->isPointerType()) ||
9053 (It->getAssociatedDeclaration() &&
9054 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9055 std::next(It) != CE && std::next(It) != SE))
9056 continue;
9057 const MapData &BaseData = CI == CE ? L : L1;
9059 SI == SE ? Components : Components1;
9060 OverlappedData[&BaseData].push_back(SubData);
9061 }
9062 }
9063 }
9064 // Sort the overlapped elements for each item.
9065 llvm::SmallVector<const FieldDecl *, 4> Layout;
9066 if (!OverlappedData.empty()) {
9067 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9068 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9069 while (BaseType != OrigType) {
9070 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9071 OrigType = BaseType->getPointeeOrArrayElementType();
9072 }
9073
9074 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9075 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9076 else {
9077 const auto *RD = BaseType->getAsRecordDecl();
9078 Layout.append(RD->field_begin(), RD->field_end());
9079 }
9080 }
9081 for (auto &Pair : OverlappedData) {
9082 llvm::stable_sort(
9083 Pair.getSecond(),
9084 [&Layout](
9087 Second) {
9088 auto CI = First.rbegin();
9089 auto CE = First.rend();
9090 auto SI = Second.rbegin();
9091 auto SE = Second.rend();
9092 for (; CI != CE && SI != SE; ++CI, ++SI) {
9093 if (CI->getAssociatedExpression()->getStmtClass() !=
9094 SI->getAssociatedExpression()->getStmtClass())
9095 break;
9096 // Are we dealing with different variables/fields?
9097 if (CI->getAssociatedDeclaration() !=
9098 SI->getAssociatedDeclaration())
9099 break;
9100 }
9101
9102 // Lists contain the same elements.
9103 if (CI == CE && SI == SE)
9104 return false;
9105
9106 // List with less elements is less than list with more elements.
9107 if (CI == CE || SI == SE)
9108 return CI == CE;
9109
9110 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9111 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9112 if (FD1->getParent() == FD2->getParent())
9113 return FD1->getFieldIndex() < FD2->getFieldIndex();
9114 const auto *It =
9115 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9116 return FD == FD1 || FD == FD2;
9117 });
9118 return *It == FD1;
9119 });
9120 }
9121
9122 // Associated with a capture, because the mapping flags depend on it.
9123 // Go through all of the elements with the overlapped elements.
9124 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9125 MapCombinedInfoTy StructBaseCombinedInfo;
9126 for (const auto &Pair : OverlappedData) {
9127 const MapData &L = *Pair.getFirst();
9129 OpenMPMapClauseKind MapType;
9130 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9131 bool IsImplicit;
9132 const ValueDecl *Mapper;
9133 const Expr *VarRef;
9134 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9135 L;
9136 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9137 OverlappedComponents = Pair.getSecond();
9138 generateInfoForComponentList(
9139 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9140 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag, IsImplicit,
9141 /*GenerateAllInfoForClauses*/ false, Mapper,
9142 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9143 AddTargetParamFlag = false;
9144 }
9145 // Go through other elements without overlapped elements.
9146 for (const MapData &L : DeclComponentLists) {
9148 OpenMPMapClauseKind MapType;
9149 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9150 bool IsImplicit;
9151 const ValueDecl *Mapper;
9152 const Expr *VarRef;
9153 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9154 L;
9155 auto It = OverlappedData.find(&L);
9156 if (It == OverlappedData.end())
9157 generateInfoForComponentList(
9158 MapType, MapModifiers, {}, Components, CurComponentListInfo,
9159 StructBaseCombinedInfo, PartialStruct, AddTargetParamFlag,
9160 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9161 /*ForDeviceAddr=*/false, VD, VarRef,
9162 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9163 AddTargetParamFlag = false;
9164 }
9165 }
9166
9167 /// Generate the default map information for a given capture \a CI,
9168 /// record field declaration \a RI and captured value \a CV.
9169 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9170 const FieldDecl &RI, llvm::Value *CV,
9171 MapCombinedInfoTy &CombinedInfo) const {
9172 bool IsImplicit = true;
9173 // Do the default mapping.
9174 if (CI.capturesThis()) {
9175 CombinedInfo.Exprs.push_back(nullptr);
9176 CombinedInfo.BasePointers.push_back(CV);
9177 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9178 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9179 CombinedInfo.Pointers.push_back(CV);
9180 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9181 CombinedInfo.Sizes.push_back(
9182 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9183 CGF.Int64Ty, /*isSigned=*/true));
9184 // Default map type.
9185 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
9186 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9187 } else if (CI.capturesVariableByCopy()) {
9188 const VarDecl *VD = CI.getCapturedVar();
9189 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9190 CombinedInfo.BasePointers.push_back(CV);
9191 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9192 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9193 CombinedInfo.Pointers.push_back(CV);
9194 if (!RI.getType()->isAnyPointerType()) {
9195 // We have to signal to the runtime captures passed by value that are
9196 // not pointers.
9197 CombinedInfo.Types.push_back(
9198 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9199 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9200 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9201 } else {
9202 // Pointers are implicitly mapped with a zero size and no flags
9203 // (other than first map that is added for all implicit maps).
9204 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9205 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9206 }
9207 auto I = FirstPrivateDecls.find(VD);
9208 if (I != FirstPrivateDecls.end())
9209 IsImplicit = I->getSecond();
9210 } else {
9211 assert(CI.capturesVariable() && "Expected captured reference.");
9212 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9213 QualType ElementType = PtrTy->getPointeeType();
9214 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9215 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9216 // The default map type for a scalar/complex type is 'to' because by
9217 // default the value doesn't have to be retrieved. For an aggregate
9218 // type, the default is 'tofrom'.
9219 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9220 const VarDecl *VD = CI.getCapturedVar();
9221 auto I = FirstPrivateDecls.find(VD);
9222 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9223 CombinedInfo.BasePointers.push_back(CV);
9224 CombinedInfo.DevicePtrDecls.push_back(nullptr);
9225 CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
9226 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9227 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9228 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9230 CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF));
9231 } else {
9232 CombinedInfo.Pointers.push_back(CV);
9233 }
9234 if (I != FirstPrivateDecls.end())
9235 IsImplicit = I->getSecond();
9236 }
9237 // Every default map produces a single argument which is a target parameter.
9238 CombinedInfo.Types.back() |=
9239 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9240
9241 // Add flag stating this is an implicit map.
9242 if (IsImplicit)
9243 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9244
9245 // No user-defined mapper for default mapping.
9246 CombinedInfo.Mappers.push_back(nullptr);
9247 }
9248};
9249} // anonymous namespace
9250
9251// Try to extract the base declaration from a `this->x` expression if possible.
9253 if (!E)
9254 return nullptr;
9255
9256 if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts()))
9257 if (const MemberExpr *ME =
9258 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9259 return ME->getMemberDecl();
9260 return nullptr;
9261}
9262
9263/// Emit a string constant containing the names of the values mapped to the
9264/// offloading runtime library.
9265static llvm::Constant *
9266emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9267 MappableExprsHandler::MappingExprInfo &MapExprs) {
9268
9269 uint32_t SrcLocStrSize;
9270 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9271 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9272
9273 SourceLocation Loc;
9274 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9275 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9276 Loc = VD->getLocation();
9277 else
9278 Loc = MapExprs.getMapExpr()->getExprLoc();
9279 } else {
9280 Loc = MapExprs.getMapDecl()->getLocation();
9281 }
9282
9283 std::string ExprName;
9284 if (MapExprs.getMapExpr()) {
9286 llvm::raw_string_ostream OS(ExprName);
9287 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9288 } else {
9289 ExprName = MapExprs.getMapDecl()->getNameAsString();
9290 }
9291
9292 std::string FileName;
9294 if (auto *DbgInfo = CGF.getDebugInfo())
9295 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9296 else
9297 FileName = PLoc.getFilename();
9298 return OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName, PLoc.getLine(),
9299 PLoc.getColumn(), SrcLocStrSize);
9300}
9301/// Emit the arrays used to pass the captures and map information to the
9302/// offloading runtime library. If there is no map or capture information,
9303/// return nullptr by reference.
9305 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9306 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9307 bool IsNonContiguous = false, bool ForEndCall = false) {
9308 CodeGenModule &CGM = CGF.CGM;
9309
9310 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9311 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9312 CGF.AllocaInsertPt->getIterator());
9313 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9314 CGF.Builder.GetInsertPoint());
9315
9316 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9317 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9318 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
9319 }
9320 };
9321
9322 auto CustomMapperCB = [&](unsigned int I) {
9323 llvm::Function *MFunc = nullptr;
9324 if (CombinedInfo.Mappers[I]) {
9325 Info.HasMapper = true;
9327 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9328 }
9329 return MFunc;
9330 };
9331 cantFail(OMPBuilder.emitOffloadingArraysAndArgs(
9332 AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, CustomMapperCB,
9333 IsNonContiguous, ForEndCall, DeviceAddrCB));
9334}
9335
9336/// Check for inner distribute directive.
9337static const OMPExecutableDirective *
9339 const auto *CS = D.getInnermostCapturedStmt();
9340 const auto *Body =
9341 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9342 const Stmt *ChildStmt =
9344
9345 if (const auto *NestedDir =
9346 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9347 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9348 switch (D.getDirectiveKind()) {
9349 case OMPD_target:
9350 // For now, treat 'target' with nested 'teams loop' as if it's
9351 // distributed (target teams distribute).
9352 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9353 return NestedDir;
9354 if (DKind == OMPD_teams) {
9355 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9356 /*IgnoreCaptured=*/true);
9357 if (!Body)
9358 return nullptr;
9359 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9360 if (const auto *NND =
9361 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9362 DKind = NND->getDirectiveKind();
9363 if (isOpenMPDistributeDirective(DKind))
9364 return NND;
9365 }
9366 }
9367 return nullptr;
9368 case OMPD_target_teams:
9369 if (isOpenMPDistributeDirective(DKind))
9370 return NestedDir;
9371 return nullptr;
9372 case OMPD_target_parallel:
9373 case OMPD_target_simd:
9374 case OMPD_target_parallel_for:
9375 case OMPD_target_parallel_for_simd:
9376 return nullptr;
9377 case OMPD_target_teams_distribute:
9378 case OMPD_target_teams_distribute_simd:
9379 case OMPD_target_teams_distribute_parallel_for:
9380 case OMPD_target_teams_distribute_parallel_for_simd:
9381 case OMPD_parallel:
9382 case OMPD_for:
9383 case OMPD_parallel_for:
9384 case OMPD_parallel_master:
9385 case OMPD_parallel_sections:
9386 case OMPD_for_simd:
9387 case OMPD_parallel_for_simd:
9388 case OMPD_cancel:
9389 case OMPD_cancellation_point:
9390 case OMPD_ordered:
9391 case OMPD_threadprivate:
9392 case OMPD_allocate:
9393 case OMPD_task:
9394 case OMPD_simd:
9395 case OMPD_tile:
9396 case OMPD_unroll:
9397 case OMPD_sections:
9398 case OMPD_section:
9399 case OMPD_single:
9400 case OMPD_master:
9401 case OMPD_critical:
9402 case OMPD_taskyield:
9403 case OMPD_barrier:
9404 case OMPD_taskwait:
9405 case OMPD_taskgroup:
9406 case OMPD_atomic:
9407 case OMPD_flush:
9408 case OMPD_depobj:
9409 case OMPD_scan:
9410 case OMPD_teams:
9411 case OMPD_target_data:
9412 case OMPD_target_exit_data:
9413 case OMPD_target_enter_data:
9414 case OMPD_distribute:
9415 case OMPD_distribute_simd:
9416 case OMPD_distribute_parallel_for:
9417 case OMPD_distribute_parallel_for_simd:
9418 case OMPD_teams_distribute:
9419 case OMPD_teams_distribute_simd:
9420 case OMPD_teams_distribute_parallel_for:
9421 case OMPD_teams_distribute_parallel_for_simd:
9422 case OMPD_target_update:
9423 case OMPD_declare_simd:
9424 case OMPD_declare_variant:
9425 case OMPD_begin_declare_variant:
9426 case OMPD_end_declare_variant:
9427 case OMPD_declare_target:
9428 case OMPD_end_declare_target:
9429 case OMPD_declare_reduction:
9430 case OMPD_declare_mapper:
9431 case OMPD_taskloop:
9432 case OMPD_taskloop_simd:
9433 case OMPD_master_taskloop:
9434 case OMPD_master_taskloop_simd:
9435 case OMPD_parallel_master_taskloop:
9436 case OMPD_parallel_master_taskloop_simd:
9437 case OMPD_requires:
9438 case OMPD_metadirective:
9439 case OMPD_unknown:
9440 default:
9441 llvm_unreachable("Unexpected directive.");
9442 }
9443 }
9444
9445 return nullptr;
9446}
9447
9448/// Emit the user-defined mapper function. The code generation follows the
9449/// pattern in the example below.
9450/// \code
9451/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9452/// void *base, void *begin,
9453/// int64_t size, int64_t type,
9454/// void *name = nullptr) {
9455/// // Allocate space for an array section first or add a base/begin for
9456/// // pointer dereference.
9457/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9458/// !maptype.IsDelete)
9459/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9460/// size*sizeof(Ty), clearToFromMember(type));
9461/// // Map members.
9462/// for (unsigned i = 0; i < size; i++) {
9463/// // For each component specified by this mapper:
9464/// for (auto c : begin[i]->all_components) {
9465/// if (c.hasMapper())
9466/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9467/// c.arg_type, c.arg_name);
9468/// else
9469/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9470/// c.arg_begin, c.arg_size, c.arg_type,
9471/// c.arg_name);
9472/// }
9473/// }
9474/// // Delete the array section.
9475/// if (size > 1 && maptype.IsDelete)
9476/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9477/// size*sizeof(Ty), clearToFromMember(type));
9478/// }
9479/// \endcode
9481 CodeGenFunction *CGF) {
9482 if (UDMMap.count(D) > 0)
9483 return;
9484 ASTContext &C = CGM.getContext();
9485 QualType Ty = D->getType();
9486 auto *MapperVarDecl =
9488 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9489 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9490
9491 CodeGenFunction MapperCGF(CGM);
9492 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9493 auto PrivatizeAndGenMapInfoCB =
9494 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9495 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9496 MapperCGF.Builder.restoreIP(CodeGenIP);
9497
9498 // Privatize the declared variable of mapper to be the current array
9499 // element.
9500 Address PtrCurrent(
9501 PtrPHI, ElemTy,
9502 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9503 .getAlignment()
9504 .alignmentOfArrayElement(ElementSize));
9506 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9507 (void)Scope.Privatize();
9508
9509 // Get map clause information.
9510 MappableExprsHandler MEHandler(*D, MapperCGF);
9511 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9512
9513 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9514 return emitMappingInformation(MapperCGF, OMPBuilder, MapExpr);
9515 };
9516 if (CGM.getCodeGenOpts().getDebugInfo() !=
9517 llvm::codegenoptions::NoDebugInfo) {
9518 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9519 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9520 FillInfoMap);
9521 }
9522
9523 return CombinedInfo;
9524 };
9525
9526 auto CustomMapperCB = [&](unsigned I) {
9527 llvm::Function *MapperFunc = nullptr;
9528 if (CombinedInfo.Mappers[I]) {
9529 // Call the corresponding mapper function.
9531 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9532 assert(MapperFunc && "Expect a valid mapper function is available.");
9533 }
9534 return MapperFunc;
9535 };
9536
9537 SmallString<64> TyStr;
9538 llvm::raw_svector_ostream Out(TyStr);
9539 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out);
9540 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9541
9542 llvm::Function *NewFn = cantFail(OMPBuilder.emitUserDefinedMapper(
9543 PrivatizeAndGenMapInfoCB, ElemTy, Name, CustomMapperCB));
9544 UDMMap.try_emplace(D, NewFn);
9545 if (CGF)
9546 FunctionUDMMap[CGF->CurFn].push_back(D);
9547}
9548
9550 const OMPDeclareMapperDecl *D) {
9551 auto I = UDMMap.find(D);
9552 if (I != UDMMap.end())
9553 return I->second;
9555 return UDMMap.lookup(D);
9556}
9557
9560 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9561 const OMPLoopDirective &D)>
9562 SizeEmitter) {
9563 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9564 const OMPExecutableDirective *TD = &D;
9565 // Get nested teams distribute kind directive, if any. For now, treat
9566 // 'target_teams_loop' as if it's really a target_teams_distribute.
9567 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9568 Kind != OMPD_target_teams_loop)
9569 TD = getNestedDistributeDirective(CGM.getContext(), D);
9570 if (!TD)
9571 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9572
9573 const auto *LD = cast<OMPLoopDirective>(TD);
9574 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9575 return NumIterations;
9576 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9577}
9578
9579static void
9580emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9581 const OMPExecutableDirective &D,
9583 bool RequiresOuterTask, const CapturedStmt &CS,
9584 bool OffloadingMandatory, CodeGenFunction &CGF) {
9585 if (OffloadingMandatory) {
9586 CGF.Builder.CreateUnreachable();
9587 } else {
9588 if (RequiresOuterTask) {
9589 CapturedVars.clear();
9590 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9591 }
9592 OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn,
9593 CapturedVars);
9594 }
9595}
9596
9597static llvm::Value *emitDeviceID(
9598 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9599 CodeGenFunction &CGF) {
9600 // Emit device ID if any.
9601 llvm::Value *DeviceID;
9602 if (Device.getPointer()) {
9603 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9604 Device.getInt() == OMPC_DEVICE_device_num) &&
9605 "Expected device_num modifier.");
9606 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9607 DeviceID =
9608 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9609 } else {
9610 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9611 }
9612 return DeviceID;
9613}
9614
9615static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9616 CodeGenFunction &CGF) {
9617 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9618
9619 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9620 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9621 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9622 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9623 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9624 /*isSigned=*/false);
9625 }
9626 return DynCGroupMem;
9627}
9629 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9630 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9631 llvm::OpenMPIRBuilder &OMPBuilder,
9632 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9633 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9634
9635 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9636 auto RI = CS.getCapturedRecordDecl()->field_begin();
9637 auto *CV = CapturedVars.begin();
9639 CE = CS.capture_end();
9640 CI != CE; ++CI, ++RI, ++CV) {
9641 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9642
9643 // VLA sizes are passed to the outlined region by copy and do not have map
9644 // information associated.
9645 if (CI->capturesVariableArrayType()) {
9646 CurInfo.Exprs.push_back(nullptr);
9647 CurInfo.BasePointers.push_back(*CV);
9648 CurInfo.DevicePtrDecls.push_back(nullptr);
9649 CurInfo.DevicePointers.push_back(
9650 MappableExprsHandler::DeviceInfoTy::None);
9651 CurInfo.Pointers.push_back(*CV);
9652 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9653 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9654 // Copy to the device as an argument. No need to retrieve it.
9655 CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9656 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9657 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9658 CurInfo.Mappers.push_back(nullptr);
9659 } else {
9660 // If we have any information in the map clause, we use it, otherwise we
9661 // just do a default mapping.
9662 MEHandler.generateInfoForCaptureFromClauseInfo(
9663 CI, *CV, CurInfo, OMPBuilder,
9664 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
9665
9666 if (!CI->capturesThis())
9667 MappedVarSet.insert(CI->getCapturedVar());
9668 else
9669 MappedVarSet.insert(nullptr);
9670
9671 if (CurInfo.BasePointers.empty())
9672 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
9673
9674 // Generate correct mapping for variables captured by reference in
9675 // lambdas.
9676 if (CI->capturesVariable())
9677 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9678 CurInfo, LambdaPointers);
9679 }
9680 // We expect to have at least an element of information for this capture.
9681 assert(!CurInfo.BasePointers.empty() &&
9682 "Non-existing map pointer for capture!");
9683 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9684 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9685 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9686 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9687 "Inconsistent map information sizes!");
9688
9689 // We need to append the results of this capture to what we already have.
9690 CombinedInfo.append(CurInfo);
9691 }
9692 // Adjust MEMBER_OF flags for the lambdas captures.
9693 MEHandler.adjustMemberOfForLambdaCaptures(
9694 OMPBuilder, LambdaPointers, CombinedInfo.BasePointers,
9695 CombinedInfo.Pointers, CombinedInfo.Types);
9696}
9697static void
9698genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9699 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9700 llvm::OpenMPIRBuilder &OMPBuilder,
9701 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9702 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9703
9704 CodeGenModule &CGM = CGF.CGM;
9705 // Map any list items in a map clause that were not captures because they
9706 // weren't referenced within the construct.
9707 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet);
9708
9709 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9710 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9711 };
9712 if (CGM.getCodeGenOpts().getDebugInfo() !=
9713 llvm::codegenoptions::NoDebugInfo) {
9714 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
9715 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
9716 FillInfoMap);
9717 }
9718}
9719
9721 const CapturedStmt &CS,
9723 llvm::OpenMPIRBuilder &OMPBuilder,
9724 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9725 // Get mappable expression information.
9726 MappableExprsHandler MEHandler(D, CGF);
9727 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9728
9729 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9730 MappedVarSet, CombinedInfo);
9731 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet);
9732}
9733
9734template <typename ClauseTy>
9735static void
9737 const OMPExecutableDirective &D,
9739 const auto *C = D.getSingleClause<ClauseTy>();
9740 assert(!C->varlist_empty() &&
9741 "ompx_bare requires explicit num_teams and thread_limit");
9743 for (auto *E : C->varlist()) {
9744 llvm::Value *V = CGF.EmitScalarExpr(E);
9745 Values.push_back(
9746 CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
9747 }
9748}
9749
9751 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9752 const OMPExecutableDirective &D,
9753 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9754 const CapturedStmt &CS, bool OffloadingMandatory,
9755 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9756 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9757 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9758 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9759 const OMPLoopDirective &D)>
9760 SizeEmitter,
9761 CodeGenFunction &CGF, CodeGenModule &CGM) {
9762 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9763
9764 // Fill up the arrays with all the captured variables.
9765 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9767 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9768
9769 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9770 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9771
9772 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9773 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9774 CGF.VoidPtrTy, CGM.getPointerAlign());
9775 InputInfo.PointersArray =
9776 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9777 InputInfo.SizesArray =
9778 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9779 InputInfo.MappersArray =
9780 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9781 MapTypesArray = Info.RTArgs.MapTypesArray;
9782 MapNamesArray = Info.RTArgs.MapNamesArray;
9783
9784 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9785 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9786 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9787 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9788 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9789
9790 if (IsReverseOffloading) {
9791 // Reverse offloading is not supported, so just execute on the host.
9792 // FIXME: This fallback solution is incorrect since it ignores the
9793 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9794 // assert here and ensure SEMA emits an error.
9795 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9796 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9797 return;
9798 }
9799
9800 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9801 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9802
9803 llvm::Value *BasePointersArray =
9804 InputInfo.BasePointersArray.emitRawPointer(CGF);
9805 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9806 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9807 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9808
9809 auto &&EmitTargetCallFallbackCB =
9810 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9811 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9812 -> llvm::OpenMPIRBuilder::InsertPointTy {
9813 CGF.Builder.restoreIP(IP);
9814 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9815 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9816 return CGF.Builder.saveIP();
9817 };
9818
9819 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9822 if (IsBare) {
9825 NumThreads);
9826 } else {
9827 NumTeams.push_back(OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9828 NumThreads.push_back(
9829 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9830 }
9831
9832 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9833 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc());
9834 llvm::Value *NumIterations =
9835 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9836 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9837 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9838 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9839
9840 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9841 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9842 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9843
9844 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9845 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9846 DynCGGroupMem, HasNoWait);
9847
9848 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9849 cantFail(OMPRuntime->getOMPBuilder().emitKernelLaunch(
9850 CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9851 RTLoc, AllocaIP));
9852 CGF.Builder.restoreIP(AfterIP);
9853 };
9854
9855 if (RequiresOuterTask)
9856 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9857 else
9858 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9859}
9860
9861static void
9862emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9863 const OMPExecutableDirective &D,
9865 bool RequiresOuterTask, const CapturedStmt &CS,
9866 bool OffloadingMandatory, CodeGenFunction &CGF) {
9867
9868 // Notify that the host version must be executed.
9869 auto &&ElseGen =
9870 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9871 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9872 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9873 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9874 };
9875
9876 if (RequiresOuterTask) {
9878 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
9879 } else {
9880 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9881 }
9882}
9883
9886 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9887 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9888 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9889 const OMPLoopDirective &D)>
9890 SizeEmitter) {
9891 if (!CGF.HaveInsertPoint())
9892 return;
9893
9894 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9895 CGM.getLangOpts().OpenMPOffloadMandatory;
9896
9897 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9898
9899 const bool RequiresOuterTask =
9900 D.hasClausesOfKind<OMPDependClause>() ||
9901 D.hasClausesOfKind<OMPNowaitClause>() ||
9902 D.hasClausesOfKind<OMPInReductionClause>() ||
9903 (CGM.getLangOpts().OpenMP >= 51 &&
9904 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9905 D.hasClausesOfKind<OMPThreadLimitClause>());
9907 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9908 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9909 PrePostActionTy &) {
9910 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9911 };
9912 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9913
9915 llvm::Value *MapTypesArray = nullptr;
9916 llvm::Value *MapNamesArray = nullptr;
9917
9918 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9919 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9920 OutlinedFnID, &InputInfo, &MapTypesArray,
9921 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9922 PrePostActionTy &) {
9923 emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars,
9924 RequiresOuterTask, CS, OffloadingMandatory,
9925 Device, OutlinedFnID, InputInfo, MapTypesArray,
9926 MapNamesArray, SizeEmitter, CGF, CGM);
9927 };
9928
9929 auto &&TargetElseGen =
9930 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9931 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9932 emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9933 CS, OffloadingMandatory, CGF);
9934 };
9935
9936 // If we have a target function ID it means that we need to support
9937 // offloading, otherwise, just execute on the host. We need to execute on host
9938 // regardless of the conditional in the if clause if, e.g., the user do not
9939 // specify target triples.
9940 if (OutlinedFnID) {
9941 if (IfCond) {
9942 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
9943 } else {
9944 RegionCodeGenTy ThenRCG(TargetThenGen);
9945 ThenRCG(CGF);
9946 }
9947 } else {
9948 RegionCodeGenTy ElseRCG(TargetElseGen);
9949 ElseRCG(CGF);
9950 }
9951}
9952
9954 StringRef ParentName) {
9955 if (!S)
9956 return;
9957
9958 // Codegen OMP target directives that offload compute to the device.
9959 bool RequiresDeviceCodegen =
9962 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9963
9964 if (RequiresDeviceCodegen) {
9965 const auto &E = *cast<OMPExecutableDirective>(S);
9966
9967 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9968 CGM, OMPBuilder, E.getBeginLoc(), ParentName);
9969
9970 // Is this a target region that should not be emitted as an entry point? If
9971 // so just signal we are done with this target region.
9972 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9973 return;
9974
9975 switch (E.getDirectiveKind()) {
9976 case OMPD_target:
9979 break;
9980 case OMPD_target_parallel:
9982 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9983 break;
9984 case OMPD_target_teams:
9986 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9987 break;
9988 case OMPD_target_teams_distribute:
9991 break;
9992 case OMPD_target_teams_distribute_simd:
9995 break;
9996 case OMPD_target_parallel_for:
9999 break;
10000 case OMPD_target_parallel_for_simd:
10003 break;
10004 case OMPD_target_simd:
10006 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10007 break;
10008 case OMPD_target_teams_distribute_parallel_for:
10010 CGM, ParentName,
10012 break;
10013 case OMPD_target_teams_distribute_parallel_for_simd:
10016 CGM, ParentName,
10018 break;
10019 case OMPD_target_teams_loop:
10022 break;
10023 case OMPD_target_parallel_loop:
10026 break;
10027 case OMPD_parallel:
10028 case OMPD_for:
10029 case OMPD_parallel_for:
10030 case OMPD_parallel_master:
10031 case OMPD_parallel_sections:
10032 case OMPD_for_simd:
10033 case OMPD_parallel_for_simd:
10034 case OMPD_cancel:
10035 case OMPD_cancellation_point:
10036 case OMPD_ordered:
10037 case OMPD_threadprivate:
10038 case OMPD_allocate:
10039 case OMPD_task:
10040 case OMPD_simd:
10041 case OMPD_tile:
10042 case OMPD_unroll:
10043 case OMPD_sections:
10044 case OMPD_section:
10045 case OMPD_single:
10046 case OMPD_master:
10047 case OMPD_critical:
10048 case OMPD_taskyield:
10049 case OMPD_barrier:
10050 case OMPD_taskwait:
10051 case OMPD_taskgroup:
10052 case OMPD_atomic:
10053 case OMPD_flush:
10054 case OMPD_depobj:
10055 case OMPD_scan:
10056 case OMPD_teams:
10057 case OMPD_target_data:
10058 case OMPD_target_exit_data:
10059 case OMPD_target_enter_data:
10060 case OMPD_distribute:
10061 case OMPD_distribute_simd:
10062 case OMPD_distribute_parallel_for:
10063 case OMPD_distribute_parallel_for_simd:
10064 case OMPD_teams_distribute:
10065 case OMPD_teams_distribute_simd:
10066 case OMPD_teams_distribute_parallel_for:
10067 case OMPD_teams_distribute_parallel_for_simd:
10068 case OMPD_target_update:
10069 case OMPD_declare_simd:
10070 case OMPD_declare_variant:
10071 case OMPD_begin_declare_variant:
10072 case OMPD_end_declare_variant:
10073 case OMPD_declare_target:
10074 case OMPD_end_declare_target:
10075 case OMPD_declare_reduction:
10076 case OMPD_declare_mapper:
10077 case OMPD_taskloop:
10078 case OMPD_taskloop_simd:
10079 case OMPD_master_taskloop:
10080 case OMPD_master_taskloop_simd:
10081 case OMPD_parallel_master_taskloop:
10082 case OMPD_parallel_master_taskloop_simd:
10083 case OMPD_requires:
10084 case OMPD_metadirective:
10085 case OMPD_unknown:
10086 default:
10087 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10088 }
10089 return;
10090 }
10091
10092 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10093 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10094 return;
10095
10096 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10097 return;
10098 }
10099
10100 // If this is a lambda function, look into its body.
10101 if (const auto *L = dyn_cast<LambdaExpr>(S))
10102 S = L->getBody();
10103
10104 // Keep looking for target regions recursively.
10105 for (const Stmt *II : S->children())
10106 scanForTargetRegionsFunctions(II, ParentName);
10107}
10108
10109static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10110 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10111 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10112 if (!DevTy)
10113 return false;
10114 // Do not emit device_type(nohost) functions for the host.
10115 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10116 return true;
10117 // Do not emit device_type(host) functions for the device.
10118 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10119 return true;
10120 return false;
10121}
10122
10124 // If emitting code for the host, we do not process FD here. Instead we do
10125 // the normal code generation.
10126 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10127 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10129 CGM.getLangOpts().OpenMPIsTargetDevice))
10130 return true;
10131 return false;
10132 }
10133
10134 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10135 // Try to detect target regions in the function.
10136 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10137 StringRef Name = CGM.getMangledName(GD);
10140 CGM.getLangOpts().OpenMPIsTargetDevice))
10141 return true;
10142 }
10143
10144 // Do not to emit function if it is not marked as declare target.
10145 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10146 AlreadyEmittedTargetDecls.count(VD) == 0;
10147}
10148
10151 CGM.getLangOpts().OpenMPIsTargetDevice))
10152 return true;
10153
10154 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10155 return false;
10156
10157 // Check if there are Ctors/Dtors in this declaration and look for target
10158 // regions in it. We use the complete variant to produce the kernel name
10159 // mangling.
10160 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10161 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10162 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10163 StringRef ParentName =
10164 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10165 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10166 }
10167 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10168 StringRef ParentName =
10169 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10170 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10171 }
10172 }
10173
10174 // Do not to emit variable if it is not marked as declare target.
10175 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10176 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10177 cast<VarDecl>(GD.getDecl()));
10178 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10179 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10180 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10183 return true;
10184 }
10185 return false;
10186}
10187
10189 llvm::Constant *Addr) {
10190 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10191 !CGM.getLangOpts().OpenMPIsTargetDevice)
10192 return;
10193
10194 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10195 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10196
10197 // If this is an 'extern' declaration we defer to the canonical definition and
10198 // do not emit an offloading entry.
10199 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10200 VD->hasExternalStorage())
10201 return;
10202
10203 if (!Res) {
10204 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10205 // Register non-target variables being emitted in device code (debug info
10206 // may cause this).
10207 StringRef VarName = CGM.getMangledName(VD);
10208 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10209 }
10210 return;
10211 }
10212
10213 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); };
10214 auto LinkageForVariable = [&VD, this]() {
10215 return CGM.getLLVMLinkageVarDefinition(VD);
10216 };
10217
10218 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10219 OMPBuilder.registerTargetGlobalVariable(
10221 VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10222 VD->isExternallyVisible(),
10224 VD->getCanonicalDecl()->getBeginLoc()),
10225 CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd,
10226 CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable,
10227 CGM.getTypes().ConvertTypeForMem(
10228 CGM.getContext().getPointerType(VD->getType())),
10229 Addr);
10230
10231 for (auto *ref : GeneratedRefs)
10232 CGM.addCompilerUsedGlobal(ref);
10233}
10234
10236 if (isa<FunctionDecl>(GD.getDecl()) ||
10238 return emitTargetFunctions(GD);
10239
10240 return emitTargetGlobalVariable(GD);
10241}
10242
10244 for (const VarDecl *VD : DeferredGlobalVariables) {
10245 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10246 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10247 if (!Res)
10248 continue;
10249 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10250 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10252 CGM.EmitGlobal(VD);
10253 } else {
10254 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10255 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10256 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10258 "Expected link clause or to clause with unified memory.");
10259 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10260 }
10261 }
10262}
10263
10265 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10266 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10267 " Expected target-based directive.");
10268}
10269
10271 for (const OMPClause *Clause : D->clauselists()) {
10272 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10274 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10275 } else if (const auto *AC =
10276 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10277 switch (AC->getAtomicDefaultMemOrderKind()) {
10278 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10279 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10280 break;
10281 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10282 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10283 break;
10284 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10285 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10286 break;
10288 break;
10289 }
10290 }
10291 }
10292}
10293
10294llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10296}
10297
10299 LangAS &AS) {
10300 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10301 return false;
10302 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10303 switch(A->getAllocatorType()) {
10304 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10305 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10306 // Not supported, fallback to the default mem space.
10307 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10308 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10309 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10310 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10311 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10312 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10313 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10314 AS = LangAS::Default;
10315 return true;
10316 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10317 llvm_unreachable("Expected predefined allocator for the variables with the "
10318 "static storage.");
10319 }
10320 return false;
10321}
10322
10326
10328 CodeGenModule &CGM)
10329 : CGM(CGM) {
10330 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10331 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10332 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10333 }
10334}
10335
10337 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10338 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10339}
10340
10342 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10343 return true;
10344
10345 const auto *D = cast<FunctionDecl>(GD.getDecl());
10346 // Do not to emit function if it is marked as declare target as it was already
10347 // emitted.
10348 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10349 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10350 if (auto *F = dyn_cast_or_null<llvm::Function>(
10351 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10352 return !F->isDeclaration();
10353 return false;
10354 }
10355 return true;
10356 }
10357
10358 return !AlreadyEmittedTargetDecls.insert(D).second;
10359}
10360
10362 const OMPExecutableDirective &D,
10363 SourceLocation Loc,
10364 llvm::Function *OutlinedFn,
10365 ArrayRef<llvm::Value *> CapturedVars) {
10366 if (!CGF.HaveInsertPoint())
10367 return;
10368
10369 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10371
10372 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10373 llvm::Value *Args[] = {
10374 RTLoc,
10375 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10376 OutlinedFn};
10378 RealArgs.append(std::begin(Args), std::end(Args));
10379 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10380
10381 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10382 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10383 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10384}
10385
10387 const Expr *NumTeams,
10388 const Expr *ThreadLimit,
10389 SourceLocation Loc) {
10390 if (!CGF.HaveInsertPoint())
10391 return;
10392
10393 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10394
10395 llvm::Value *NumTeamsVal =
10396 NumTeams
10397 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10398 CGF.CGM.Int32Ty, /* isSigned = */ true)
10399 : CGF.Builder.getInt32(0);
10400
10401 llvm::Value *ThreadLimitVal =
10402 ThreadLimit
10403 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10404 CGF.CGM.Int32Ty, /* isSigned = */ true)
10405 : CGF.Builder.getInt32(0);
10406
10407 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10408 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10409 ThreadLimitVal};
10410 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10411 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10412 PushNumTeamsArgs);
10413}
10414
10416 const Expr *ThreadLimit,
10417 SourceLocation Loc) {
10418 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10419 llvm::Value *ThreadLimitVal =
10420 ThreadLimit
10421 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10422 CGF.CGM.Int32Ty, /* isSigned = */ true)
10423 : CGF.Builder.getInt32(0);
10424
10425 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10426 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10427 ThreadLimitVal};
10428 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10429 CGM.getModule(), OMPRTL___kmpc_set_thread_limit),
10430 ThreadLimitArgs);
10431}
10432
10434 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10435 const Expr *Device, const RegionCodeGenTy &CodeGen,
10437 if (!CGF.HaveInsertPoint())
10438 return;
10439
10440 // Action used to replace the default codegen action and turn privatization
10441 // off.
10442 PrePostActionTy NoPrivAction;
10443
10444 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10445
10446 llvm::Value *IfCondVal = nullptr;
10447 if (IfCond)
10448 IfCondVal = CGF.EvaluateExprAsBool(IfCond);
10449
10450 // Emit device ID if any.
10451 llvm::Value *DeviceID = nullptr;
10452 if (Device) {
10453 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10454 CGF.Int64Ty, /*isSigned=*/true);
10455 } else {
10456 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10457 }
10458
10459 // Fill up the arrays with all the mapped variables.
10460 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10461 auto GenMapInfoCB =
10462 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10463 CGF.Builder.restoreIP(CodeGenIP);
10464 // Get map clause information.
10465 MappableExprsHandler MEHandler(D, CGF);
10466 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10467
10468 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10469 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
10470 };
10471 if (CGM.getCodeGenOpts().getDebugInfo() !=
10472 llvm::codegenoptions::NoDebugInfo) {
10473 CombinedInfo.Names.resize(CombinedInfo.Exprs.size());
10474 llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(),
10475 FillInfoMap);
10476 }
10477
10478 return CombinedInfo;
10479 };
10480 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10481 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10482 CGF.Builder.restoreIP(CodeGenIP);
10483 switch (BodyGenType) {
10484 case BodyGenTy::Priv:
10485 if (!Info.CaptureDeviceAddrMap.empty())
10486 CodeGen(CGF);
10487 break;
10488 case BodyGenTy::DupNoPriv:
10489 if (!Info.CaptureDeviceAddrMap.empty()) {
10490 CodeGen.setAction(NoPrivAction);
10491 CodeGen(CGF);
10492 }
10493 break;
10494 case BodyGenTy::NoPriv:
10495 if (Info.CaptureDeviceAddrMap.empty()) {
10496 CodeGen.setAction(NoPrivAction);
10497 CodeGen(CGF);
10498 }
10499 break;
10500 }
10501 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10502 CGF.Builder.GetInsertPoint());
10503 };
10504
10505 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10506 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10507 Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl);
10508 }
10509 };
10510
10511 auto CustomMapperCB = [&](unsigned int I) {
10512 llvm::Function *MFunc = nullptr;
10513 if (CombinedInfo.Mappers[I]) {
10514 Info.HasMapper = true;
10516 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
10517 }
10518 return MFunc;
10519 };
10520
10521 // Source location for the ident struct
10522 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10523
10524 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10525 CGF.AllocaInsertPt->getIterator());
10526 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10527 CGF.Builder.GetInsertPoint());
10528 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10529 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10530 cantFail(OMPBuilder.createTargetData(
10531 OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
10532 CustomMapperCB,
10533 /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
10534 CGF.Builder.restoreIP(AfterIP);
10535}
10536
10538 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10539 const Expr *Device) {
10540 if (!CGF.HaveInsertPoint())
10541 return;
10542
10546 "Expecting either target enter, exit data, or update directives.");
10547
10549 llvm::Value *MapTypesArray = nullptr;
10550 llvm::Value *MapNamesArray = nullptr;
10551 // Generate the code for the opening of the data environment.
10552 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10553 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10554 // Emit device ID if any.
10555 llvm::Value *DeviceID = nullptr;
10556 if (Device) {
10557 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10558 CGF.Int64Ty, /*isSigned=*/true);
10559 } else {
10560 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10561 }
10562
10563 // Emit the number of elements in the offloading arrays.
10564 llvm::Constant *PointerNum =
10565 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10566
10567 // Source location for the ident struct
10568 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10569
10570 SmallVector<llvm::Value *, 13> OffloadingArgs(
10571 {RTLoc, DeviceID, PointerNum,
10572 InputInfo.BasePointersArray.emitRawPointer(CGF),
10573 InputInfo.PointersArray.emitRawPointer(CGF),
10574 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10575 InputInfo.MappersArray.emitRawPointer(CGF)});
10576
10577 // Select the right runtime function call for each standalone
10578 // directive.
10579 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10580 RuntimeFunction RTLFn;
10581 switch (D.getDirectiveKind()) {
10582 case OMPD_target_enter_data:
10583 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10584 : OMPRTL___tgt_target_data_begin_mapper;
10585 break;
10586 case OMPD_target_exit_data:
10587 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10588 : OMPRTL___tgt_target_data_end_mapper;
10589 break;
10590 case OMPD_target_update:
10591 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10592 : OMPRTL___tgt_target_data_update_mapper;
10593 break;
10594 case OMPD_parallel:
10595 case OMPD_for:
10596 case OMPD_parallel_for:
10597 case OMPD_parallel_master:
10598 case OMPD_parallel_sections:
10599 case OMPD_for_simd:
10600 case OMPD_parallel_for_simd:
10601 case OMPD_cancel:
10602 case OMPD_cancellation_point:
10603 case OMPD_ordered:
10604 case OMPD_threadprivate:
10605 case OMPD_allocate:
10606 case OMPD_task:
10607 case OMPD_simd:
10608 case OMPD_tile:
10609 case OMPD_unroll:
10610 case OMPD_sections:
10611 case OMPD_section:
10612 case OMPD_single:
10613 case OMPD_master:
10614 case OMPD_critical:
10615 case OMPD_taskyield:
10616 case OMPD_barrier:
10617 case OMPD_taskwait:
10618 case OMPD_taskgroup:
10619 case OMPD_atomic:
10620 case OMPD_flush:
10621 case OMPD_depobj:
10622 case OMPD_scan:
10623 case OMPD_teams:
10624 case OMPD_target_data:
10625 case OMPD_distribute:
10626 case OMPD_distribute_simd:
10627 case OMPD_distribute_parallel_for:
10628 case OMPD_distribute_parallel_for_simd:
10629 case OMPD_teams_distribute:
10630 case OMPD_teams_distribute_simd:
10631 case OMPD_teams_distribute_parallel_for:
10632 case OMPD_teams_distribute_parallel_for_simd:
10633 case OMPD_declare_simd:
10634 case OMPD_declare_variant:
10635 case OMPD_begin_declare_variant:
10636 case OMPD_end_declare_variant:
10637 case OMPD_declare_target:
10638 case OMPD_end_declare_target:
10639 case OMPD_declare_reduction:
10640 case OMPD_declare_mapper:
10641 case OMPD_taskloop:
10642 case OMPD_taskloop_simd:
10643 case OMPD_master_taskloop:
10644 case OMPD_master_taskloop_simd:
10645 case OMPD_parallel_master_taskloop:
10646 case OMPD_parallel_master_taskloop_simd:
10647 case OMPD_target:
10648 case OMPD_target_simd:
10649 case OMPD_target_teams_distribute:
10650 case OMPD_target_teams_distribute_simd:
10651 case OMPD_target_teams_distribute_parallel_for:
10652 case OMPD_target_teams_distribute_parallel_for_simd:
10653 case OMPD_target_teams:
10654 case OMPD_target_parallel:
10655 case OMPD_target_parallel_for:
10656 case OMPD_target_parallel_for_simd:
10657 case OMPD_requires:
10658 case OMPD_metadirective:
10659 case OMPD_unknown:
10660 default:
10661 llvm_unreachable("Unexpected standalone target data directive.");
10662 break;
10663 }
10664 if (HasNowait) {
10665 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10666 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10667 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty));
10668 OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy));
10669 }
10670 CGF.EmitRuntimeCall(
10671 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10672 OffloadingArgs);
10673 };
10674
10675 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10676 &MapNamesArray](CodeGenFunction &CGF,
10677 PrePostActionTy &) {
10678 // Fill up the arrays with all the mapped variables.
10679 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10681 MappableExprsHandler MEHandler(D, CGF);
10682 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10683 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10684 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10685
10686 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10687 D.hasClausesOfKind<OMPNowaitClause>();
10688
10689 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10690 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10691 CGF.VoidPtrTy, CGM.getPointerAlign());
10692 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10693 CGM.getPointerAlign());
10694 InputInfo.SizesArray =
10695 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10696 InputInfo.MappersArray =
10697 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10698 MapTypesArray = Info.RTArgs.MapTypesArray;
10699 MapNamesArray = Info.RTArgs.MapNamesArray;
10700 if (RequiresOuterTask)
10701 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10702 else
10703 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10704 };
10705
10706 if (IfCond) {
10707 emitIfClause(CGF, IfCond, TargetThenGen,
10708 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10709 } else {
10710 RegionCodeGenTy ThenRCG(TargetThenGen);
10711 ThenRCG(CGF);
10712 }
10713}
10714
10715namespace {
10716 /// Kind of parameter in a function with 'declare simd' directive.
10717enum ParamKindTy {
10718 Linear,
10719 LinearRef,
10720 LinearUVal,
10721 LinearVal,
10722 Uniform,
10723 Vector,
10724};
10725/// Attribute set of the parameter.
10726struct ParamAttrTy {
10727 ParamKindTy Kind = Vector;
10728 llvm::APSInt StrideOrArg;
10729 llvm::APSInt Alignment;
10730 bool HasVarStride = false;
10731};
10732} // namespace
10733
10734static unsigned evaluateCDTSize(const FunctionDecl *FD,
10735 ArrayRef<ParamAttrTy> ParamAttrs) {
10736 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10737 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10738 // of that clause. The VLEN value must be power of 2.
10739 // In other case the notion of the function`s "characteristic data type" (CDT)
10740 // is used to compute the vector length.
10741 // CDT is defined in the following order:
10742 // a) For non-void function, the CDT is the return type.
10743 // b) If the function has any non-uniform, non-linear parameters, then the
10744 // CDT is the type of the first such parameter.
10745 // c) If the CDT determined by a) or b) above is struct, union, or class
10746 // type which is pass-by-value (except for the type that maps to the
10747 // built-in complex data type), the characteristic data type is int.
10748 // d) If none of the above three cases is applicable, the CDT is int.
10749 // The VLEN is then determined based on the CDT and the size of vector
10750 // register of that ISA for which current vector version is generated. The
10751 // VLEN is computed using the formula below:
10752 // VLEN = sizeof(vector_register) / sizeof(CDT),
10753 // where vector register size specified in section 3.2.1 Registers and the
10754 // Stack Frame of original AMD64 ABI document.
10755 QualType RetType = FD->getReturnType();
10756 if (RetType.isNull())
10757 return 0;
10758 ASTContext &C = FD->getASTContext();
10759 QualType CDT;
10760 if (!RetType.isNull() && !RetType->isVoidType()) {
10761 CDT = RetType;
10762 } else {
10763 unsigned Offset = 0;
10764 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
10765 if (ParamAttrs[Offset].Kind == Vector)
10766 CDT = C.getPointerType(C.getCanonicalTagType(MD->getParent()));
10767 ++Offset;
10768 }
10769 if (CDT.isNull()) {
10770 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10771 if (ParamAttrs[I + Offset].Kind == Vector) {
10772 CDT = FD->getParamDecl(I)->getType();
10773 break;
10774 }
10775 }
10776 }
10777 }
10778 if (CDT.isNull())
10779 CDT = C.IntTy;
10780 CDT = CDT->getCanonicalTypeUnqualified();
10781 if (CDT->isRecordType() || CDT->isUnionType())
10782 CDT = C.IntTy;
10783 return C.getTypeSize(CDT);
10784}
10785
10786/// Mangle the parameter part of the vector function name according to
10787/// their OpenMP classification. The mangling function is defined in
10788/// section 4.5 of the AAVFABI(2021Q1).
10789static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10790 SmallString<256> Buffer;
10791 llvm::raw_svector_ostream Out(Buffer);
10792 for (const auto &ParamAttr : ParamAttrs) {
10793 switch (ParamAttr.Kind) {
10794 case Linear:
10795 Out << 'l';
10796 break;
10797 case LinearRef:
10798 Out << 'R';
10799 break;
10800 case LinearUVal:
10801 Out << 'U';
10802 break;
10803 case LinearVal:
10804 Out << 'L';
10805 break;
10806 case Uniform:
10807 Out << 'u';
10808 break;
10809 case Vector:
10810 Out << 'v';
10811 break;
10812 }
10813 if (ParamAttr.HasVarStride)
10814 Out << "s" << ParamAttr.StrideOrArg;
10815 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10816 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10817 // Don't print the step value if it is not present or if it is
10818 // equal to 1.
10819 if (ParamAttr.StrideOrArg < 0)
10820 Out << 'n' << -ParamAttr.StrideOrArg;
10821 else if (ParamAttr.StrideOrArg != 1)
10822 Out << ParamAttr.StrideOrArg;
10823 }
10824
10825 if (!!ParamAttr.Alignment)
10826 Out << 'a' << ParamAttr.Alignment;
10827 }
10828
10829 return std::string(Out.str());
10830}
10831
10832static void
10833emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10834 const llvm::APSInt &VLENVal,
10835 ArrayRef<ParamAttrTy> ParamAttrs,
10836 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10837 struct ISADataTy {
10838 char ISA;
10839 unsigned VecRegSize;
10840 };
10841 ISADataTy ISAData[] = {
10842 {
10843 'b', 128
10844 }, // SSE
10845 {
10846 'c', 256
10847 }, // AVX
10848 {
10849 'd', 256
10850 }, // AVX2
10851 {
10852 'e', 512
10853 }, // AVX512
10854 };
10856 switch (State) {
10857 case OMPDeclareSimdDeclAttr::BS_Undefined:
10858 Masked.push_back('N');
10859 Masked.push_back('M');
10860 break;
10861 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10862 Masked.push_back('N');
10863 break;
10864 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10865 Masked.push_back('M');
10866 break;
10867 }
10868 for (char Mask : Masked) {
10869 for (const ISADataTy &Data : ISAData) {
10870 SmallString<256> Buffer;
10871 llvm::raw_svector_ostream Out(Buffer);
10872 Out << "_ZGV" << Data.ISA << Mask;
10873 if (!VLENVal) {
10874 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10875 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10876 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
10877 } else {
10878 Out << VLENVal;
10879 }
10880 Out << mangleVectorParameters(ParamAttrs);
10881 Out << '_' << Fn->getName();
10882 Fn->addFnAttr(Out.str());
10883 }
10884 }
10885}
10886
10887// This are the Functions that are needed to mangle the name of the
10888// vector functions generated by the compiler, according to the rules
10889// defined in the "Vector Function ABI specifications for AArch64",
10890// available at
10891// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10892
10893/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10894static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10895 QT = QT.getCanonicalType();
10896
10897 if (QT->isVoidType())
10898 return false;
10899
10900 if (Kind == ParamKindTy::Uniform)
10901 return false;
10902
10903 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10904 return false;
10905
10906 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10907 !QT->isReferenceType())
10908 return false;
10909
10910 return true;
10911}
10912
10913/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10915 QT = QT.getCanonicalType();
10916 unsigned Size = C.getTypeSize(QT);
10917
10918 // Only scalars and complex within 16 bytes wide set PVB to true.
10919 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10920 return false;
10921
10922 if (QT->isFloatingType())
10923 return true;
10924
10925 if (QT->isIntegerType())
10926 return true;
10927
10928 if (QT->isPointerType())
10929 return true;
10930
10931 // TODO: Add support for complex types (section 3.1.2, item 2).
10932
10933 return false;
10934}
10935
10936/// Computes the lane size (LS) of a return type or of an input parameter,
10937/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10938/// TODO: Add support for references, section 3.2.1, item 1.
10939static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10940 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10942 if (getAArch64PBV(PTy, C))
10943 return C.getTypeSize(PTy);
10944 }
10945 if (getAArch64PBV(QT, C))
10946 return C.getTypeSize(QT);
10947
10948 return C.getTypeSize(C.getUIntPtrType());
10949}
10950
10951// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10952// signature of the scalar function, as defined in 3.2.2 of the
10953// AAVFABI.
10954static std::tuple<unsigned, unsigned, bool>
10956 QualType RetType = FD->getReturnType().getCanonicalType();
10957
10958 ASTContext &C = FD->getASTContext();
10959
10960 bool OutputBecomesInput = false;
10961
10963 if (!RetType->isVoidType()) {
10964 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
10965 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10966 OutputBecomesInput = true;
10967 }
10968 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10970 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10971 }
10972
10973 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10974 // The LS of a function parameter / return value can only be a power
10975 // of 2, starting from 8 bits, up to 128.
10976 assert(llvm::all_of(Sizes,
10977 [](unsigned Size) {
10978 return Size == 8 || Size == 16 || Size == 32 ||
10979 Size == 64 || Size == 128;
10980 }) &&
10981 "Invalid size");
10982
10983 return std::make_tuple(*llvm::min_element(Sizes), *llvm::max_element(Sizes),
10984 OutputBecomesInput);
10985}
10986
10987// Function used to add the attribute. The parameter `VLEN` is
10988// templated to allow the use of "x" when targeting scalable functions
10989// for SVE.
10990template <typename T>
10991static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10992 char ISA, StringRef ParSeq,
10993 StringRef MangledName, bool OutputBecomesInput,
10994 llvm::Function *Fn) {
10995 SmallString<256> Buffer;
10996 llvm::raw_svector_ostream Out(Buffer);
10997 Out << Prefix << ISA << LMask << VLEN;
10998 if (OutputBecomesInput)
10999 Out << "v";
11000 Out << ParSeq << "_" << MangledName;
11001 Fn->addFnAttr(Out.str());
11002}
11003
11004// Helper function to generate the Advanced SIMD names depending on
11005// the value of the NDS when simdlen is not present.
11006static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11007 StringRef Prefix, char ISA,
11008 StringRef ParSeq, StringRef MangledName,
11009 bool OutputBecomesInput,
11010 llvm::Function *Fn) {
11011 switch (NDS) {
11012 case 8:
11013 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11014 OutputBecomesInput, Fn);
11015 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11016 OutputBecomesInput, Fn);
11017 break;
11018 case 16:
11019 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11020 OutputBecomesInput, Fn);
11021 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11022 OutputBecomesInput, Fn);
11023 break;
11024 case 32:
11025 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11026 OutputBecomesInput, Fn);
11027 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11028 OutputBecomesInput, Fn);
11029 break;
11030 case 64:
11031 case 128:
11032 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11033 OutputBecomesInput, Fn);
11034 break;
11035 default:
11036 llvm_unreachable("Scalar type is too wide.");
11037 }
11038}
11039
11040/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11042 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11043 ArrayRef<ParamAttrTy> ParamAttrs,
11044 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11045 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11046
11047 // Get basic data for building the vector signature.
11048 const auto Data = getNDSWDS(FD, ParamAttrs);
11049 const unsigned NDS = std::get<0>(Data);
11050 const unsigned WDS = std::get<1>(Data);
11051 const bool OutputBecomesInput = std::get<2>(Data);
11052
11053 // Check the values provided via `simdlen` by the user.
11054 // 1. A `simdlen(1)` doesn't produce vector signatures,
11055 if (UserVLEN == 1) {
11056 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11058 "The clause simdlen(1) has no effect when targeting aarch64.");
11059 CGM.getDiags().Report(SLoc, DiagID);
11060 return;
11061 }
11062
11063 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11064 // Advanced SIMD output.
11065 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11066 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11067 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11068 "power of 2 when targeting Advanced SIMD.");
11069 CGM.getDiags().Report(SLoc, DiagID);
11070 return;
11071 }
11072
11073 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11074 // limits.
11075 if (ISA == 's' && UserVLEN != 0) {
11076 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11077 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11078 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11079 "lanes in the architectural constraints "
11080 "for SVE (min is 128-bit, max is "
11081 "2048-bit, by steps of 128-bit)");
11082 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11083 return;
11084 }
11085 }
11086
11087 // Sort out parameter sequence.
11088 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11089 StringRef Prefix = "_ZGV";
11090 // Generate simdlen from user input (if any).
11091 if (UserVLEN) {
11092 if (ISA == 's') {
11093 // SVE generates only a masked function.
11094 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11095 OutputBecomesInput, Fn);
11096 } else {
11097 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11098 // Advanced SIMD generates one or two functions, depending on
11099 // the `[not]inbranch` clause.
11100 switch (State) {
11101 case OMPDeclareSimdDeclAttr::BS_Undefined:
11102 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11103 OutputBecomesInput, Fn);
11104 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11105 OutputBecomesInput, Fn);
11106 break;
11107 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11108 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11109 OutputBecomesInput, Fn);
11110 break;
11111 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11112 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11113 OutputBecomesInput, Fn);
11114 break;
11115 }
11116 }
11117 } else {
11118 // If no user simdlen is provided, follow the AAVFABI rules for
11119 // generating the vector length.
11120 if (ISA == 's') {
11121 // SVE, section 3.4.1, item 1.
11122 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11123 OutputBecomesInput, Fn);
11124 } else {
11125 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11126 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11127 // two vector names depending on the use of the clause
11128 // `[not]inbranch`.
11129 switch (State) {
11130 case OMPDeclareSimdDeclAttr::BS_Undefined:
11131 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11132 OutputBecomesInput, Fn);
11133 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11134 OutputBecomesInput, Fn);
11135 break;
11136 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11137 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11138 OutputBecomesInput, Fn);
11139 break;
11140 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11141 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11142 OutputBecomesInput, Fn);
11143 break;
11144 }
11145 }
11146 }
11147}
11148
11150 llvm::Function *Fn) {
11151 ASTContext &C = CGM.getContext();
11152 FD = FD->getMostRecentDecl();
11153 while (FD) {
11154 // Map params to their positions in function decl.
11155 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11156 if (isa<CXXMethodDecl>(FD))
11157 ParamPositions.try_emplace(FD, 0);
11158 unsigned ParamPos = ParamPositions.size();
11159 for (const ParmVarDecl *P : FD->parameters()) {
11160 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11161 ++ParamPos;
11162 }
11163 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11164 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11165 // Mark uniform parameters.
11166 for (const Expr *E : Attr->uniforms()) {
11167 E = E->IgnoreParenImpCasts();
11168 unsigned Pos;
11169 if (isa<CXXThisExpr>(E)) {
11170 Pos = ParamPositions[FD];
11171 } else {
11172 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11173 ->getCanonicalDecl();
11174 auto It = ParamPositions.find(PVD);
11175 assert(It != ParamPositions.end() && "Function parameter not found");
11176 Pos = It->second;
11177 }
11178 ParamAttrs[Pos].Kind = Uniform;
11179 }
11180 // Get alignment info.
11181 auto *NI = Attr->alignments_begin();
11182 for (const Expr *E : Attr->aligneds()) {
11183 E = E->IgnoreParenImpCasts();
11184 unsigned Pos;
11185 QualType ParmTy;
11186 if (isa<CXXThisExpr>(E)) {
11187 Pos = ParamPositions[FD];
11188 ParmTy = E->getType();
11189 } else {
11190 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11191 ->getCanonicalDecl();
11192 auto It = ParamPositions.find(PVD);
11193 assert(It != ParamPositions.end() && "Function parameter not found");
11194 Pos = It->second;
11195 ParmTy = PVD->getType();
11196 }
11197 ParamAttrs[Pos].Alignment =
11198 (*NI)
11199 ? (*NI)->EvaluateKnownConstInt(C)
11200 : llvm::APSInt::getUnsigned(
11201 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11202 .getQuantity());
11203 ++NI;
11204 }
11205 // Mark linear parameters.
11206 auto *SI = Attr->steps_begin();
11207 auto *MI = Attr->modifiers_begin();
11208 for (const Expr *E : Attr->linears()) {
11209 E = E->IgnoreParenImpCasts();
11210 unsigned Pos;
11211 bool IsReferenceType = false;
11212 // Rescaling factor needed to compute the linear parameter
11213 // value in the mangled name.
11214 unsigned PtrRescalingFactor = 1;
11215 if (isa<CXXThisExpr>(E)) {
11216 Pos = ParamPositions[FD];
11217 auto *P = cast<PointerType>(E->getType());
11218 PtrRescalingFactor = CGM.getContext()
11219 .getTypeSizeInChars(P->getPointeeType())
11220 .getQuantity();
11221 } else {
11222 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11223 ->getCanonicalDecl();
11224 auto It = ParamPositions.find(PVD);
11225 assert(It != ParamPositions.end() && "Function parameter not found");
11226 Pos = It->second;
11227 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11228 PtrRescalingFactor = CGM.getContext()
11229 .getTypeSizeInChars(P->getPointeeType())
11230 .getQuantity();
11231 else if (PVD->getType()->isReferenceType()) {
11232 IsReferenceType = true;
11233 PtrRescalingFactor =
11234 CGM.getContext()
11235 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11236 .getQuantity();
11237 }
11238 }
11239 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11240 if (*MI == OMPC_LINEAR_ref)
11241 ParamAttr.Kind = LinearRef;
11242 else if (*MI == OMPC_LINEAR_uval)
11243 ParamAttr.Kind = LinearUVal;
11244 else if (IsReferenceType)
11245 ParamAttr.Kind = LinearVal;
11246 else
11247 ParamAttr.Kind = Linear;
11248 // Assuming a stride of 1, for `linear` without modifiers.
11249 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11250 if (*SI) {
11252 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11253 if (const auto *DRE =
11254 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11255 if (const auto *StridePVD =
11256 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11257 ParamAttr.HasVarStride = true;
11258 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11259 assert(It != ParamPositions.end() &&
11260 "Function parameter not found");
11261 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11262 }
11263 }
11264 } else {
11265 ParamAttr.StrideOrArg = Result.Val.getInt();
11266 }
11267 }
11268 // If we are using a linear clause on a pointer, we need to
11269 // rescale the value of linear_step with the byte size of the
11270 // pointee type.
11271 if (!ParamAttr.HasVarStride &&
11272 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11273 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11274 ++SI;
11275 ++MI;
11276 }
11277 llvm::APSInt VLENVal;
11278 SourceLocation ExprLoc;
11279 const Expr *VLENExpr = Attr->getSimdlen();
11280 if (VLENExpr) {
11281 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11282 ExprLoc = VLENExpr->getExprLoc();
11283 }
11284 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11285 if (CGM.getTriple().isX86()) {
11286 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11287 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11288 unsigned VLEN = VLENVal.getExtValue();
11289 StringRef MangledName = Fn->getName();
11290 if (CGM.getTarget().hasFeature("sve"))
11291 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11292 MangledName, 's', 128, Fn, ExprLoc);
11293 else if (CGM.getTarget().hasFeature("neon"))
11294 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11295 MangledName, 'n', 128, Fn, ExprLoc);
11296 }
11297 }
11298 FD = FD->getPreviousDecl();
11299 }
11300}
11301
11302namespace {
11303/// Cleanup action for doacross support.
11304class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11305public:
11306 static const int DoacrossFinArgs = 2;
11307
11308private:
11309 llvm::FunctionCallee RTLFn;
11310 llvm::Value *Args[DoacrossFinArgs];
11311
11312public:
11313 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11314 ArrayRef<llvm::Value *> CallArgs)
11315 : RTLFn(RTLFn) {
11316 assert(CallArgs.size() == DoacrossFinArgs);
11317 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11318 }
11319 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11320 if (!CGF.HaveInsertPoint())
11321 return;
11322 CGF.EmitRuntimeCall(RTLFn, Args);
11323 }
11324};
11325} // namespace
11326
11328 const OMPLoopDirective &D,
11329 ArrayRef<Expr *> NumIterations) {
11330 if (!CGF.HaveInsertPoint())
11331 return;
11332
11333 ASTContext &C = CGM.getContext();
11334 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11335 RecordDecl *RD;
11336 if (KmpDimTy.isNull()) {
11337 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11338 // kmp_int64 lo; // lower
11339 // kmp_int64 up; // upper
11340 // kmp_int64 st; // stride
11341 // };
11342 RD = C.buildImplicitRecord("kmp_dim");
11343 RD->startDefinition();
11344 addFieldToRecordDecl(C, RD, Int64Ty);
11345 addFieldToRecordDecl(C, RD, Int64Ty);
11346 addFieldToRecordDecl(C, RD, Int64Ty);
11347 RD->completeDefinition();
11348 KmpDimTy = C.getCanonicalTagType(RD);
11349 } else {
11350 RD = KmpDimTy->castAsRecordDecl();
11351 }
11352 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11353 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11355
11356 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11357 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11358 enum { LowerFD = 0, UpperFD, StrideFD };
11359 // Fill dims with data.
11360 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11361 LValue DimsLVal = CGF.MakeAddrLValue(
11362 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11363 // dims.upper = num_iterations;
11364 LValue UpperLVal = CGF.EmitLValueForField(
11365 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11366 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11367 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11368 Int64Ty, NumIterations[I]->getExprLoc());
11369 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11370 // dims.stride = 1;
11371 LValue StrideLVal = CGF.EmitLValueForField(
11372 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11373 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11374 StrideLVal);
11375 }
11376
11377 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11378 // kmp_int32 num_dims, struct kmp_dim * dims);
11379 llvm::Value *Args[] = {
11380 emitUpdateLocation(CGF, D.getBeginLoc()),
11381 getThreadID(CGF, D.getBeginLoc()),
11382 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11384 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF),
11385 CGM.VoidPtrTy)};
11386
11387 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11388 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11389 CGF.EmitRuntimeCall(RTLFn, Args);
11390 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11391 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11392 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11393 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11394 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11395 llvm::ArrayRef(FiniArgs));
11396}
11397
11398template <typename T>
11400 const T *C, llvm::Value *ULoc,
11401 llvm::Value *ThreadID) {
11402 QualType Int64Ty =
11403 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11404 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11406 Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0);
11407 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11408 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11409 const Expr *CounterVal = C->getLoopData(I);
11410 assert(CounterVal);
11411 llvm::Value *CntVal = CGF.EmitScalarConversion(
11412 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11413 CounterVal->getExprLoc());
11414 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11415 /*Volatile=*/false, Int64Ty);
11416 }
11417 llvm::Value *Args[] = {
11418 ULoc, ThreadID,
11419 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)};
11420 llvm::FunctionCallee RTLFn;
11421 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11422 OMPDoacrossKind<T> ODK;
11423 if (ODK.isSource(C)) {
11424 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11425 OMPRTL___kmpc_doacross_post);
11426 } else {
11427 assert(ODK.isSink(C) && "Expect sink modifier.");
11428 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11429 OMPRTL___kmpc_doacross_wait);
11430 }
11431 CGF.EmitRuntimeCall(RTLFn, Args);
11432}
11433
11435 const OMPDependClause *C) {
11437 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11438 getThreadID(CGF, C->getBeginLoc()));
11439}
11440
11442 const OMPDoacrossClause *C) {
11444 CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()),
11445 getThreadID(CGF, C->getBeginLoc()));
11446}
11447
11449 llvm::FunctionCallee Callee,
11450 ArrayRef<llvm::Value *> Args) const {
11451 assert(Loc.isValid() && "Outlined function call location must be valid.");
11453
11454 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11455 if (Fn->doesNotThrow()) {
11456 CGF.EmitNounwindRuntimeCall(Fn, Args);
11457 return;
11458 }
11459 }
11460 CGF.EmitRuntimeCall(Callee, Args);
11461}
11462
11464 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11465 ArrayRef<llvm::Value *> Args) const {
11466 emitCall(CGF, Loc, OutlinedFn, Args);
11467}
11468
11470 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11471 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11473}
11474
11476 const VarDecl *NativeParam,
11477 const VarDecl *TargetParam) const {
11478 return CGF.GetAddrOfLocalVar(NativeParam);
11479}
11480
11481/// Return allocator value from expression, or return a null allocator (default
11482/// when no allocator specified).
11483static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11484 const Expr *Allocator) {
11485 llvm::Value *AllocVal;
11486 if (Allocator) {
11487 AllocVal = CGF.EmitScalarExpr(Allocator);
11488 // According to the standard, the original allocator type is a enum
11489 // (integer). Convert to pointer type, if required.
11490 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11491 CGF.getContext().VoidPtrTy,
11492 Allocator->getExprLoc());
11493 } else {
11494 // If no allocator specified, it defaults to the null allocator.
11495 AllocVal = llvm::Constant::getNullValue(
11497 }
11498 return AllocVal;
11499}
11500
11501/// Return the alignment from an allocate directive if present.
11502static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11503 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11504
11505 if (!AllocateAlignment)
11506 return nullptr;
11507
11508 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11509}
11510
11512 const VarDecl *VD) {
11513 if (!VD)
11514 return Address::invalid();
11515 Address UntiedAddr = Address::invalid();
11516 Address UntiedRealAddr = Address::invalid();
11517 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11518 if (It != FunctionToUntiedTaskStackMap.end()) {
11519 const UntiedLocalVarsAddressesMap &UntiedData =
11520 UntiedLocalVarsStack[It->second];
11521 auto I = UntiedData.find(VD);
11522 if (I != UntiedData.end()) {
11523 UntiedAddr = I->second.first;
11524 UntiedRealAddr = I->second.second;
11525 }
11526 }
11527 const VarDecl *CVD = VD->getCanonicalDecl();
11528 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11529 // Use the default allocation.
11530 if (!isAllocatableDecl(VD))
11531 return UntiedAddr;
11532 llvm::Value *Size;
11533 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11534 if (CVD->getType()->isVariablyModifiedType()) {
11535 Size = CGF.getTypeSize(CVD->getType());
11536 // Align the size: ((size + align - 1) / align) * align
11537 Size = CGF.Builder.CreateNUWAdd(
11538 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11539 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11540 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11541 } else {
11542 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11543 Size = CGM.getSize(Sz.alignTo(Align));
11544 }
11545 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11546 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11547 const Expr *Allocator = AA->getAllocator();
11548 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11549 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11551 Args.push_back(ThreadID);
11552 if (Alignment)
11553 Args.push_back(Alignment);
11554 Args.push_back(Size);
11555 Args.push_back(AllocVal);
11556 llvm::omp::RuntimeFunction FnID =
11557 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11558 llvm::Value *Addr = CGF.EmitRuntimeCall(
11559 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11560 getName({CVD->getName(), ".void.addr"}));
11561 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11562 CGM.getModule(), OMPRTL___kmpc_free);
11563 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11565 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11566 if (UntiedAddr.isValid())
11567 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11568
11569 // Cleanup action for allocate support.
11570 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11571 llvm::FunctionCallee RTLFn;
11572 SourceLocation::UIntTy LocEncoding;
11573 Address Addr;
11574 const Expr *AllocExpr;
11575
11576 public:
11577 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11578 SourceLocation::UIntTy LocEncoding, Address Addr,
11579 const Expr *AllocExpr)
11580 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11581 AllocExpr(AllocExpr) {}
11582 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11583 if (!CGF.HaveInsertPoint())
11584 return;
11585 llvm::Value *Args[3];
11586 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11587 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11589 Addr.emitRawPointer(CGF), CGF.VoidPtrTy);
11590 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11591 Args[2] = AllocVal;
11592 CGF.EmitRuntimeCall(RTLFn, Args);
11593 }
11594 };
11595 Address VDAddr =
11596 UntiedRealAddr.isValid()
11597 ? UntiedRealAddr
11598 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11599 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11600 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11601 VDAddr, Allocator);
11602 if (UntiedRealAddr.isValid())
11603 if (auto *Region =
11604 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11605 Region->emitUntiedSwitch(CGF);
11606 return VDAddr;
11607 }
11608 return UntiedAddr;
11609}
11610
11612 const VarDecl *VD) const {
11613 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11614 if (It == FunctionToUntiedTaskStackMap.end())
11615 return false;
11616 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11617}
11618
11620 CodeGenModule &CGM, const OMPLoopDirective &S)
11621 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11622 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11623 if (!NeedToPush)
11624 return;
11626 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11627 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11628 for (const Stmt *Ref : C->private_refs()) {
11629 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11630 const ValueDecl *VD;
11631 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11632 VD = DRE->getDecl();
11633 } else {
11634 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11635 assert((ME->isImplicitCXXThis() ||
11636 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11637 "Expected member of current class.");
11638 VD = ME->getMemberDecl();
11639 }
11640 DS.insert(VD);
11641 }
11642 }
11643}
11644
11646 if (!NeedToPush)
11647 return;
11648 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11649}
11650
11652 CodeGenFunction &CGF,
11653 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11654 std::pair<Address, Address>> &LocalVars)
11655 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11656 if (!NeedToPush)
11657 return;
11658 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11659 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11660 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11661}
11662
11664 if (!NeedToPush)
11665 return;
11666 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11667}
11668
11670 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11671
11672 return llvm::any_of(
11673 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11674 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11675}
11676
11677void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11678 const OMPExecutableDirective &S,
11679 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11680 const {
11681 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11682 // Vars in target/task regions must be excluded completely.
11683 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11684 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11686 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11687 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11688 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11689 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11690 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11691 }
11692 }
11693 // Exclude vars in private clauses.
11694 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11695 for (const Expr *Ref : C->varlist()) {
11696 if (!Ref->getType()->isScalarType())
11697 continue;
11698 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11699 if (!DRE)
11700 continue;
11701 NeedToCheckForLPCs.insert(DRE->getDecl());
11702 }
11703 }
11704 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11705 for (const Expr *Ref : C->varlist()) {
11706 if (!Ref->getType()->isScalarType())
11707 continue;
11708 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11709 if (!DRE)
11710 continue;
11711 NeedToCheckForLPCs.insert(DRE->getDecl());
11712 }
11713 }
11714 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11715 for (const Expr *Ref : C->varlist()) {
11716 if (!Ref->getType()->isScalarType())
11717 continue;
11718 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11719 if (!DRE)
11720 continue;
11721 NeedToCheckForLPCs.insert(DRE->getDecl());
11722 }
11723 }
11724 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11725 for (const Expr *Ref : C->varlist()) {
11726 if (!Ref->getType()->isScalarType())
11727 continue;
11728 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11729 if (!DRE)
11730 continue;
11731 NeedToCheckForLPCs.insert(DRE->getDecl());
11732 }
11733 }
11734 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11735 for (const Expr *Ref : C->varlist()) {
11736 if (!Ref->getType()->isScalarType())
11737 continue;
11738 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11739 if (!DRE)
11740 continue;
11741 NeedToCheckForLPCs.insert(DRE->getDecl());
11742 }
11743 }
11744 for (const Decl *VD : NeedToCheckForLPCs) {
11745 for (const LastprivateConditionalData &Data :
11746 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11747 if (Data.DeclToUniqueName.count(VD) > 0) {
11748 if (!Data.Disabled)
11749 NeedToAddForLPCsAsDisabled.insert(VD);
11750 break;
11751 }
11752 }
11753 }
11754}
11755
11756CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11757 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11758 : CGM(CGF.CGM),
11759 Action((CGM.getLangOpts().OpenMP >= 50 &&
11760 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11761 [](const OMPLastprivateClause *C) {
11762 return C->getKind() ==
11763 OMPC_LASTPRIVATE_conditional;
11764 }))
11765 ? ActionToDo::PushAsLastprivateConditional
11766 : ActionToDo::DoNotPush) {
11767 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11768 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11769 return;
11770 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11771 "Expected a push action.");
11773 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11774 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11775 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11776 continue;
11777
11778 for (const Expr *Ref : C->varlist()) {
11779 Data.DeclToUniqueName.insert(std::make_pair(
11780 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11781 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11782 }
11783 }
11784 Data.IVLVal = IVLVal;
11785 Data.Fn = CGF.CurFn;
11786}
11787
11788CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11790 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11791 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11792 if (CGM.getLangOpts().OpenMP < 50)
11793 return;
11794 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11795 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11796 if (!NeedToAddForLPCsAsDisabled.empty()) {
11797 Action = ActionToDo::DisableLastprivateConditional;
11798 LastprivateConditionalData &Data =
11800 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11801 Data.DeclToUniqueName.try_emplace(VD);
11802 Data.Fn = CGF.CurFn;
11803 Data.Disabled = true;
11804 }
11805}
11806
11807CGOpenMPRuntime::LastprivateConditionalRAII
11809 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11810 return LastprivateConditionalRAII(CGF, S);
11811}
11812
11814 if (CGM.getLangOpts().OpenMP < 50)
11815 return;
11816 if (Action == ActionToDo::DisableLastprivateConditional) {
11817 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11818 "Expected list of disabled private vars.");
11819 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11820 }
11821 if (Action == ActionToDo::PushAsLastprivateConditional) {
11822 assert(
11823 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11824 "Expected list of lastprivate conditional vars.");
11825 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11826 }
11827}
11828
11830 const VarDecl *VD) {
11831 ASTContext &C = CGM.getContext();
11832 auto I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
11833 QualType NewType;
11834 const FieldDecl *VDField;
11835 const FieldDecl *FiredField;
11836 LValue BaseLVal;
11837 auto VI = I->getSecond().find(VD);
11838 if (VI == I->getSecond().end()) {
11839 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
11840 RD->startDefinition();
11841 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11842 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11843 RD->completeDefinition();
11844 NewType = C.getCanonicalTagType(RD);
11845 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11846 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
11847 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11848 } else {
11849 NewType = std::get<0>(VI->getSecond());
11850 VDField = std::get<1>(VI->getSecond());
11851 FiredField = std::get<2>(VI->getSecond());
11852 BaseLVal = std::get<3>(VI->getSecond());
11853 }
11854 LValue FiredLVal =
11855 CGF.EmitLValueForField(BaseLVal, FiredField);
11857 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
11858 FiredLVal);
11859 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress();
11860}
11861
11862namespace {
11863/// Checks if the lastprivate conditional variable is referenced in LHS.
11864class LastprivateConditionalRefChecker final
11865 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11867 const Expr *FoundE = nullptr;
11868 const Decl *FoundD = nullptr;
11869 StringRef UniqueDeclName;
11870 LValue IVLVal;
11871 llvm::Function *FoundFn = nullptr;
11872 SourceLocation Loc;
11873
11874public:
11875 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11877 llvm::reverse(LPM)) {
11878 auto It = D.DeclToUniqueName.find(E->getDecl());
11879 if (It == D.DeclToUniqueName.end())
11880 continue;
11881 if (D.Disabled)
11882 return false;
11883 FoundE = E;
11884 FoundD = E->getDecl()->getCanonicalDecl();
11885 UniqueDeclName = It->second;
11886 IVLVal = D.IVLVal;
11887 FoundFn = D.Fn;
11888 break;
11889 }
11890 return FoundE == E;
11891 }
11892 bool VisitMemberExpr(const MemberExpr *E) {
11894 return false;
11896 llvm::reverse(LPM)) {
11897 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11898 if (It == D.DeclToUniqueName.end())
11899 continue;
11900 if (D.Disabled)
11901 return false;
11902 FoundE = E;
11903 FoundD = E->getMemberDecl()->getCanonicalDecl();
11904 UniqueDeclName = It->second;
11905 IVLVal = D.IVLVal;
11906 FoundFn = D.Fn;
11907 break;
11908 }
11909 return FoundE == E;
11910 }
11911 bool VisitStmt(const Stmt *S) {
11912 for (const Stmt *Child : S->children()) {
11913 if (!Child)
11914 continue;
11915 if (const auto *E = dyn_cast<Expr>(Child))
11916 if (!E->isGLValue())
11917 continue;
11918 if (Visit(Child))
11919 return true;
11920 }
11921 return false;
11922 }
11923 explicit LastprivateConditionalRefChecker(
11924 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11925 : LPM(LPM) {}
11926 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11927 getFoundData() const {
11928 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11929 }
11930};
11931} // namespace
11932
11934 LValue IVLVal,
11935 StringRef UniqueDeclName,
11936 LValue LVal,
11937 SourceLocation Loc) {
11938 // Last updated loop counter for the lastprivate conditional var.
11939 // int<xx> last_iv = 0;
11940 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11941 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11942 LLIVTy, getName({UniqueDeclName, "iv"}));
11943 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11944 IVLVal.getAlignment().getAsAlign());
11945 LValue LastIVLVal =
11946 CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType());
11947
11948 // Last value of the lastprivate conditional.
11949 // decltype(priv_a) last_a;
11950 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11951 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
11952 cast<llvm::GlobalVariable>(Last)->setAlignment(
11953 LVal.getAlignment().getAsAlign());
11954 LValue LastLVal =
11955 CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11956
11957 // Global loop counter. Required to handle inner parallel-for regions.
11958 // iv
11959 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
11960
11961 // #pragma omp critical(a)
11962 // if (last_iv <= iv) {
11963 // last_iv = iv;
11964 // last_a = priv_a;
11965 // }
11966 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11967 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11968 Action.Enter(CGF);
11969 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
11970 // (last_iv <= iv) ? Check if the variable is updated and store new
11971 // value in global var.
11972 llvm::Value *CmpRes;
11973 if (IVLVal.getType()->isSignedIntegerType()) {
11974 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11975 } else {
11976 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11977 "Loop iteration variable must be integer.");
11978 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11979 }
11980 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11981 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11982 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11983 // {
11984 CGF.EmitBlock(ThenBB);
11985
11986 // last_iv = iv;
11987 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11988
11989 // last_a = priv_a;
11990 switch (CGF.getEvaluationKind(LVal.getType())) {
11991 case TEK_Scalar: {
11992 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
11993 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11994 break;
11995 }
11996 case TEK_Complex: {
11997 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
11998 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11999 break;
12000 }
12001 case TEK_Aggregate:
12002 llvm_unreachable(
12003 "Aggregates are not supported in lastprivate conditional.");
12004 }
12005 // }
12006 CGF.EmitBranch(ExitBB);
12007 // There is no need to emit line number for unconditional branch.
12009 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12010 };
12011
12012 if (CGM.getLangOpts().OpenMPSimd) {
12013 // Do not emit as a critical region as no parallel region could be emitted.
12014 RegionCodeGenTy ThenRCG(CodeGen);
12015 ThenRCG(CGF);
12016 } else {
12017 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12018 }
12019}
12020
12022 const Expr *LHS) {
12023 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12024 return;
12025 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12026 if (!Checker.Visit(LHS))
12027 return;
12028 const Expr *FoundE;
12029 const Decl *FoundD;
12030 StringRef UniqueDeclName;
12031 LValue IVLVal;
12032 llvm::Function *FoundFn;
12033 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12034 Checker.getFoundData();
12035 if (FoundFn != CGF.CurFn) {
12036 // Special codegen for inner parallel regions.
12037 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12038 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12039 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
12040 "Lastprivate conditional is not found in outer region.");
12041 QualType StructTy = std::get<0>(It->getSecond());
12042 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12043 LValue PrivLVal = CGF.EmitLValue(FoundE);
12045 PrivLVal.getAddress(),
12046 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12047 CGF.ConvertTypeForMem(StructTy));
12048 LValue BaseLVal =
12049 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12050 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12051 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12052 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12053 FiredLVal, llvm::AtomicOrdering::Unordered,
12054 /*IsVolatile=*/true, /*isInit=*/false);
12055 return;
12056 }
12057
12058 // Private address of the lastprivate conditional in the current context.
12059 // priv_a
12060 LValue LVal = CGF.EmitLValue(FoundE);
12061 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12062 FoundE->getExprLoc());
12063}
12064
12067 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12068 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12069 return;
12070 auto Range = llvm::reverse(LastprivateConditionalStack);
12071 auto It = llvm::find_if(
12072 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12073 if (It == Range.end() || It->Fn != CGF.CurFn)
12074 return;
12075 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12076 assert(LPCI != LastprivateConditionalToTypes.end() &&
12077 "Lastprivates must be registered already.");
12079 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12080 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12081 for (const auto &Pair : It->DeclToUniqueName) {
12082 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12083 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12084 continue;
12085 auto I = LPCI->getSecond().find(Pair.first);
12086 assert(I != LPCI->getSecond().end() &&
12087 "Lastprivate must be rehistered already.");
12088 // bool Cmp = priv_a.Fired != 0;
12089 LValue BaseLVal = std::get<3>(I->getSecond());
12090 LValue FiredLVal =
12091 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12092 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12093 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12094 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12095 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12096 // if (Cmp) {
12097 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12098 CGF.EmitBlock(ThenBB);
12099 Address Addr = CGF.GetAddrOfLocalVar(VD);
12100 LValue LVal;
12101 if (VD->getType()->isReferenceType())
12102 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12104 else
12105 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12107 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12108 D.getBeginLoc());
12110 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12111 // }
12112 }
12113}
12114
12116 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12117 SourceLocation Loc) {
12118 if (CGF.getLangOpts().OpenMP < 50)
12119 return;
12120 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12121 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
12122 "Unknown lastprivate conditional variable.");
12123 StringRef UniqueName = It->second;
12124 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12125 // The variable was not updated in the region - exit.
12126 if (!GV)
12127 return;
12128 LValue LPLVal = CGF.MakeRawAddrLValue(
12129 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
12130 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12131 CGF.EmitStoreOfScalar(Res, PrivLVal);
12132}
12133
12136 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12137 const RegionCodeGenTy &CodeGen) {
12138 llvm_unreachable("Not supported in SIMD-only mode");
12139}
12140
12143 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12144 const RegionCodeGenTy &CodeGen) {
12145 llvm_unreachable("Not supported in SIMD-only mode");
12146}
12147
12149 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12150 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12151 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12152 bool Tied, unsigned &NumberOfParts) {
12153 llvm_unreachable("Not supported in SIMD-only mode");
12154}
12155
12157 CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
12158 ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
12159 llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
12160 OpenMPSeverityClauseKind Severity, const Expr *Message) {
12161 llvm_unreachable("Not supported in SIMD-only mode");
12162}
12163
12165 CodeGenFunction &CGF, StringRef CriticalName,
12166 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12167 const Expr *Hint) {
12168 llvm_unreachable("Not supported in SIMD-only mode");
12169}
12170
12172 const RegionCodeGenTy &MasterOpGen,
12173 SourceLocation Loc) {
12174 llvm_unreachable("Not supported in SIMD-only mode");
12175}
12176
12178 const RegionCodeGenTy &MasterOpGen,
12179 SourceLocation Loc,
12180 const Expr *Filter) {
12181 llvm_unreachable("Not supported in SIMD-only mode");
12182}
12183
12185 SourceLocation Loc) {
12186 llvm_unreachable("Not supported in SIMD-only mode");
12187}
12188
12190 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12191 SourceLocation Loc) {
12192 llvm_unreachable("Not supported in SIMD-only mode");
12193}
12194
12196 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12197 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12199 ArrayRef<const Expr *> AssignmentOps) {
12200 llvm_unreachable("Not supported in SIMD-only mode");
12201}
12202
12204 const RegionCodeGenTy &OrderedOpGen,
12205 SourceLocation Loc,
12206 bool IsThreads) {
12207 llvm_unreachable("Not supported in SIMD-only mode");
12208}
12209
12211 SourceLocation Loc,
12213 bool EmitChecks,
12214 bool ForceSimpleCall) {
12215 llvm_unreachable("Not supported in SIMD-only mode");
12216}
12217
12220 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12221 bool Ordered, const DispatchRTInput &DispatchValues) {
12222 llvm_unreachable("Not supported in SIMD-only mode");
12223}
12224
12226 SourceLocation Loc) {
12227 llvm_unreachable("Not supported in SIMD-only mode");
12228}
12229
12232 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12233 llvm_unreachable("Not supported in SIMD-only mode");
12234}
12235
12238 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12239 llvm_unreachable("Not supported in SIMD-only mode");
12240}
12241
12243 SourceLocation Loc,
12244 unsigned IVSize,
12245 bool IVSigned) {
12246 llvm_unreachable("Not supported in SIMD-only mode");
12247}
12248
12250 SourceLocation Loc,
12251 OpenMPDirectiveKind DKind) {
12252 llvm_unreachable("Not supported in SIMD-only mode");
12253}
12254
12256 SourceLocation Loc,
12257 unsigned IVSize, bool IVSigned,
12258 Address IL, Address LB,
12259 Address UB, Address ST) {
12260 llvm_unreachable("Not supported in SIMD-only mode");
12261}
12262
12264 CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
12266 const Expr *Message) {
12267 llvm_unreachable("Not supported in SIMD-only mode");
12268}
12269
12271 ProcBindKind ProcBind,
12272 SourceLocation Loc) {
12273 llvm_unreachable("Not supported in SIMD-only mode");
12274}
12275
12277 const VarDecl *VD,
12278 Address VDAddr,
12279 SourceLocation Loc) {
12280 llvm_unreachable("Not supported in SIMD-only mode");
12281}
12282
12284 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12285 CodeGenFunction *CGF) {
12286 llvm_unreachable("Not supported in SIMD-only mode");
12287}
12288
12290 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12291 llvm_unreachable("Not supported in SIMD-only mode");
12292}
12293
12296 SourceLocation Loc,
12297 llvm::AtomicOrdering AO) {
12298 llvm_unreachable("Not supported in SIMD-only mode");
12299}
12300
12302 const OMPExecutableDirective &D,
12303 llvm::Function *TaskFunction,
12304 QualType SharedsTy, Address Shareds,
12305 const Expr *IfCond,
12306 const OMPTaskDataTy &Data) {
12307 llvm_unreachable("Not supported in SIMD-only mode");
12308}
12309
12312 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12313 const Expr *IfCond, const OMPTaskDataTy &Data) {
12314 llvm_unreachable("Not supported in SIMD-only mode");
12315}
12316
12320 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12321 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12322 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12323 ReductionOps, Options);
12324}
12325
12328 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12329 llvm_unreachable("Not supported in SIMD-only mode");
12330}
12331
12333 SourceLocation Loc,
12334 bool IsWorksharingReduction) {
12335 llvm_unreachable("Not supported in SIMD-only mode");
12336}
12337
12339 SourceLocation Loc,
12340 ReductionCodeGen &RCG,
12341 unsigned N) {
12342 llvm_unreachable("Not supported in SIMD-only mode");
12343}
12344
12346 SourceLocation Loc,
12347 llvm::Value *ReductionsPtr,
12348 LValue SharedLVal) {
12349 llvm_unreachable("Not supported in SIMD-only mode");
12350}
12351
12353 SourceLocation Loc,
12354 const OMPTaskDataTy &Data) {
12355 llvm_unreachable("Not supported in SIMD-only mode");
12356}
12357
12360 OpenMPDirectiveKind CancelRegion) {
12361 llvm_unreachable("Not supported in SIMD-only mode");
12362}
12363
12365 SourceLocation Loc, const Expr *IfCond,
12366 OpenMPDirectiveKind CancelRegion) {
12367 llvm_unreachable("Not supported in SIMD-only mode");
12368}
12369
12371 const OMPExecutableDirective &D, StringRef ParentName,
12372 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12373 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12374 llvm_unreachable("Not supported in SIMD-only mode");
12375}
12376
12379 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12380 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12381 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12382 const OMPLoopDirective &D)>
12383 SizeEmitter) {
12384 llvm_unreachable("Not supported in SIMD-only mode");
12385}
12386
12388 llvm_unreachable("Not supported in SIMD-only mode");
12389}
12390
12392 llvm_unreachable("Not supported in SIMD-only mode");
12393}
12394
12396 return false;
12397}
12398
12400 const OMPExecutableDirective &D,
12401 SourceLocation Loc,
12402 llvm::Function *OutlinedFn,
12403 ArrayRef<llvm::Value *> CapturedVars) {
12404 llvm_unreachable("Not supported in SIMD-only mode");
12405}
12406
12408 const Expr *NumTeams,
12409 const Expr *ThreadLimit,
12410 SourceLocation Loc) {
12411 llvm_unreachable("Not supported in SIMD-only mode");
12412}
12413
12415 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12416 const Expr *Device, const RegionCodeGenTy &CodeGen,
12418 llvm_unreachable("Not supported in SIMD-only mode");
12419}
12420
12422 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12423 const Expr *Device) {
12424 llvm_unreachable("Not supported in SIMD-only mode");
12425}
12426
12428 const OMPLoopDirective &D,
12429 ArrayRef<Expr *> NumIterations) {
12430 llvm_unreachable("Not supported in SIMD-only mode");
12431}
12432
12434 const OMPDependClause *C) {
12435 llvm_unreachable("Not supported in SIMD-only mode");
12436}
12437
12439 const OMPDoacrossClause *C) {
12440 llvm_unreachable("Not supported in SIMD-only mode");
12441}
12442
12443const VarDecl *
12445 const VarDecl *NativeParam) const {
12446 llvm_unreachable("Not supported in SIMD-only mode");
12447}
12448
12449Address
12451 const VarDecl *NativeParam,
12452 const VarDecl *TargetParam) const {
12453 llvm_unreachable("Not supported in SIMD-only mode");
12454}
#define V(N, I)
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps, SourceLocation Loc)
static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, SourceLocation Loc, SmallString< 128 > &Buffer)
static void emitOffloadingArraysAndArgs(CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous=false, bool ForEndCall=false)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, Address OriginalBaseAddress, llvm::Value *Addr)
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static void emitClauseForBareTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &Values)
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS.
static void emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, llvm::PointerUnion< unsigned *, LValue * > Pos, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables.
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
static llvm::Value * getAllocatorVal(CodeGenFunction &CGF, const Expr *Allocator)
Return allocator value from expression, or return a null allocator (default when no allocator specifi...
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static bool isAllocatableDecl(const VarDecl *VD)
static llvm::Value * getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD)
Return the alignment from an allocate directive if present.
static void emitTargetCallKernelLaunch(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter, CodeGenFunction &CGF, CodeGenModule &CGM)
static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind convertCaptureClause(const VarDecl *VD)
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
static std::pair< llvm::Value *, llvm::Value * > getPointerAndSize(CodeGenFunction &CGF, const Expr *E)
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, const Expr **E, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondVal)
Check for a num threads constant value (stored in DefaultVal), or expression (stored in E).
static llvm::Value * emitDeviceID(llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, CodeGenFunction &CGF)
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, ArrayRef< PrivateDataTy > Privates)
Checks if destructor function is required to be generated.
static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, SourceLocation BeginLoc, llvm::StringRef ParentName="")
static void genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet< CanonicalDeclPtr< const Decl > > &SkippedVarSet=llvm::DenseSet< CanonicalDeclPtr< const Decl > >())
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs)
Emit a string constant containing the names of the values mapped to the offloading runtime library.
static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, QualType &FlagsTy)
Builds kmp_depend_info, if it is not built yet, and builds flags type.
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind convertDeviceClause(const VarDecl *VD)
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3....
static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, const T *C, llvm::Value *ULoc, llvm::Value *ThreadID)
static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K)
Translates internal dependency kind into the runtime kind.
static void emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, bool RequiresOuterTask, const CapturedStmt &CS, bool OffloadingMandatory, CodeGenFunction &CGF)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static std::string generateUniqueName(CodeGenModule &CGM, llvm::StringRef Prefix, const Expr *Ref)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable.
static llvm::Value * emitDynCGGroupMem(const OMPExecutableDirective &D, CodeGenFunction &CGF)
static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
static ValueDecl * getDeclFromThisExpr(const Expr *E)
static void genMapInfoForCaptures(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl< llvm::Value * > &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, llvm::DenseSet< CanonicalDeclPtr< const Decl > > &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
Expr::Classification Cl
TokenType getType() const
Returns the token's type, e.g.
FormatToken * Next
The next token in the unwrapped line.
#define X(type, name)
Definition Value.h:97
#define SM(sm)
This file defines OpenMP AST classes for clauses.
Defines some OpenMP-specific enums and functions.
Defines the SourceManager interface.
This file defines OpenMP AST classes for executable directives and clauses.
__DEVICE__ int max(int __a, int __b)
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:188
SourceManager & getSourceManager()
Definition ASTContext.h:798
const ConstantArrayType * getAsConstantArrayType(QualType T) const
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D,...
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType VoidPtrTy
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, const Expr *SizeExpr, ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type.
const LangOptions & getLangOpts() const
Definition ASTContext.h:891
CanQualType BoolTy
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth,...
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
int64_t toBits(CharUnits CharSize) const
Convert a size in characters to a size in bits.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
const VariableArrayType * getAsVariableArrayType(QualType T) const
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
unsigned getTypeAlign(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in bits.
CharUnits getSize() const
getSize - Get the record size in characters.
uint64_t getFieldOffset(unsigned FieldNo) const
getFieldOffset - Get the offset of the given field index, in bits.
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition Expr.cpp:5265
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition TypeBase.h:3720
Attr - This represents one attribute.
Definition Attr.h:44
Represents a C++ constructor within a class.
Definition DeclCXX.h:2604
Represents a C++ destructor within a class.
Definition DeclCXX.h:2869
const CXXRecordDecl * getParent() const
Return the parent of this method declaration, which is the class in which this method is defined.
Definition DeclCXX.h:2255
QualType getFunctionObjectParameterType() const
Definition DeclCXX.h:2279
base_class_range bases()
Definition DeclCXX.h:608
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition DeclCXX.h:1018
void getCaptureFields(llvm::DenseMap< const ValueDecl *, FieldDecl * > &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition DeclCXX.cpp:1784
unsigned getNumBases() const
Retrieves the number of base classes of this class.
Definition DeclCXX.h:602
base_class_range vbases()
Definition DeclCXX.h:625
capture_const_range captures() const
Definition DeclCXX.h:1097
ctor_range ctors() const
Definition DeclCXX.h:670
CXXDestructorDecl * getDestructor() const
Returns the destructor decl for this class.
Definition DeclCXX.cpp:2121
CanProxy< U > castAs() const
A wrapper class around a pointer that always points to its canonical declaration.
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition Stmt.h:3899
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition Stmt.h:3933
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition Stmt.cpp:1349
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition Stmt.h:3939
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition Stmt.h:3927
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition Stmt.h:3930
This captures a statement into a function.
Definition Stmt.h:3886
const Capture * const_capture_iterator
Definition Stmt.h:4020
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition Stmt.h:4037
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition Stmt.h:4007
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition Stmt.h:3990
bool capturesVariable(const VarDecl *Var) const
True if this variable has been captured.
Definition Stmt.cpp:1475
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition Stmt.h:4032
capture_range captures()
Definition Stmt.h:4024
CharUnits - This is an opaque type for sizes expressed in character units.
Definition CharUnits.h:38
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition CharUnits.h:122
llvm::Align getAsAlign() const
getAsAlign - Returns Quantity as a valid llvm::Align, Beware llvm::Align assumes power of two 8-bit b...
Definition CharUnits.h:189
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition CharUnits.h:214
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition CharUnits.h:63
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition CharUnits.h:201
Like RawAddress, an abstract representation of an aligned address, but the pointer contained in this ...
Definition Address.h:128
static Address invalid()
Definition Address.h:176
llvm::Value * emitRawPointer(CodeGenFunction &CGF) const
Return the pointer contained in this class after authenticating it and adding offset to it if necessa...
Definition Address.h:253
CharUnits getAlignment() const
Definition Address.h:194
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:209
Address withPointer(llvm::Value *NewPointer, KnownNonNull_t IsKnownNonNull) const
Return address with different pointer, but same element type and alignment.
Definition Address.h:261
Address withElementType(llvm::Type *ElemTy) const
Return address with different element type, but same pointer and alignment.
Definition Address.h:276
bool isValid() const
Definition Address.h:177
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition Address.h:204
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition CGBuilder.h:140
Address CreateGEP(CodeGenFunction &CGF, Address Addr, llvm::Value *Index, const llvm::Twine &Name="")
Definition CGBuilder.h:296
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, llvm::Type *ElementTy, const llvm::Twine &Name="")
Definition CGBuilder.h:207
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ... produce name = getelementptr inbounds addr, i64 0, i64 index where i64 is a...
Definition CGBuilder.h:245
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition CGBuilder.h:112
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition CGBuilder.h:369
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ... produce name = getelementptr inbounds addr, i64 index where i64 is actually the t...
Definition CGBuilder.h:282
CGFunctionInfo - Class to encapsulate the information about a function definition.
static LastprivateConditionalRAII disable(CodeGenFunction &CGF, const OMPExecutableDirective &S)
NontemporalDeclsRAII(CodeGenModule &CGM, const OMPLoopDirective &S)
Struct that keeps all the relevant information that should be kept throughout a 'target data' region.
llvm::DenseMap< const ValueDecl *, llvm::Value * > CaptureDeviceAddrMap
Map between the a declaration of a capture and the corresponding new llvm address where the runtime r...
UntiedTaskLocalDeclsRAII(CodeGenFunction &CGF, const llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > &LocalVars)
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the task directive.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata.
const Expr * getNumTeamsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, int32_t &MaxTeamsVal)
Emit the number of teams for a target directive.
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::SmallDenseSet< CanonicalDeclPtr< const Decl > > NontemporalDeclsSet
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::string getOutlinedHelperName(StringRef Name) const
Get the function name of an outlined region.
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
virtual void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc)
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args={}) const
Emits Callee function call with arguments Args with location Loc.
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
virtual bool isGPU() const
Returns true if the current target is a GPU.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
virtual void emitDeclareTargetFunction(const FunctionDecl *FD, llvm::GlobalValue *GV)
Emit code for handling declare target functions in the runtime.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0, bool EmitLoc=false)
Emits object of ident_t type with info for source location.
bool isLocalVarInUntiedTask(CodeGenFunction &CGF, const VarDecl *VD) const
Returns true if the variable is a local variable in untied task.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
virtual ConstantAddress getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
llvm::Function * getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D)
Get the function for the specified user-defined mapper.
OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
QualType KmpTaskTQTy
Type typedef struct kmp_task { void * shareds; /‍**< pointer to block of pointers to shared vars ‍/ k...
llvm::OpenMPIRBuilder OMPBuilder
An OpenMP-IR-Builder instance.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations)
Emit initialization for doacross loop nesting support.
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the target data mapping code associated with D.
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitUsesAllocatorsFini(CodeGenFunction &CGF, const Expr *Allocator)
Destroys user defined allocators specified in the uses_allocators clause.
QualType KmpTaskAffinityInfoTy
Type typedef struct kmp_task_affinity_info { kmp_intptr_t base_addr; size_t len; struct { bool flag1 ...
void emitPrivateReduction(CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates, const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps)
Emits code for private variable reduction.
llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for 'target' directive.
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and emit all target regions found along the way.
SmallVector< llvm::Value *, 4 > emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, const OMPTaskDataTy::DependData &Data)
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
virtual llvm::Value * emitMessageClause(CodeGenFunction &CGF, const Expr *Message)
llvm::DenseMap< llvm::Function *, llvm::DenseMap< CanonicalDeclPtr< const Decl >, std::tuple< QualType, const FieldDecl *, const FieldDecl *, LValue > > > LastprivateConditionalToTypes
Maps local variables marked as lastprivate conditional to their internal types.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
void emitUserDefinedMapper(const OMPDeclareMapperDecl *D, CodeGenFunction *CGF=nullptr)
Emit the function for the user defined mapper construct.
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue PosLVal, const OMPTaskDataTy::DependData &Data, Address DependenciesArray)
std::string getReductionFuncName(StringRef Name) const
Get the function name of a reduction function.
virtual void processRequiresDirective(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing.
llvm::DenseSet< CanonicalDeclPtr< const Decl > > AlreadyEmittedTargetDecls
List of the emitted declarations.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
void emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, OpenMPDependClauseKind NewDepKind, SourceLocation Loc)
Updates the dependency kind in the specified depobj object.
virtual void emitLastprivateConditionalFinalUpdate(CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, SourceLocation Loc)
Gets the address of the global copy used for lastprivate conditional update, if any.
llvm::MapVector< CanonicalDeclPtr< const VarDecl >, std::pair< Address, Address > > UntiedLocalVarsAddressesMap
virtual void emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, Expr *ME, bool IsFatal)
Emit __kmpc_error call for error directive extern void __kmpc_error(ident_t *loc, int severity,...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
void computeMinAndMaxThreadsAndTeams(const OMPExecutableDirective &D, CodeGenFunction &CGF, llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
Helper to determine the min/max number of threads/teams for D.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO)
Emit flush of the variables specified in 'omp flush' directive.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data)
Emit code for 'taskwait' directive.
virtual void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, StringRef UniqueDeclName, LValue LVal, SourceLocation Loc)
Emit update for lastprivate conditional data.
virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
Emit task region for the taskloop directive.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
Emit task region for the task directive.
llvm::Value * emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Return the trip count of loops associated with constructs / 'target teams distribute' and 'teams dist...
llvm::StringMap< llvm::AssertingVH< llvm::GlobalVariable >, llvm::BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
llvm::SmallVector< UntiedLocalVarsAddressesMap, 4 > UntiedLocalVarsStack
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop.
virtual void emitThreadLimitClause(CodeGenFunction &CGF, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_set_thread_limit(ident_t *loc, kmp_int32global_tid, kmp_int32 thread_limit)...
void emitIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP 'if' clause using specified CodeGen function.
Address emitDepobjDependClause(CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs) for depob...
virtual llvm::Value * emitSeverityClause(OpenMPSeverityClauseKind Severity)
bool isNontemporalDecl(const ValueDecl *VD) const
Checks if the VD variable is marked as nontemporal declaration in current context.
virtual llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
const Expr * getNumThreadsExprForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, bool UpperBoundOnly, llvm::Value **CondExpr=nullptr, const Expr **ThreadLimitExpr=nullptr)
Check for a number of threads upper bound constant value (stored in UpperBound), or expression (retur...
llvm::SmallVector< LastprivateConditionalData, 4 > LastprivateConditionalStack
Stack for list of addresses of declarations in current context marked as lastprivate conditional.
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
llvm::AtomicOrdering getDefaultMemoryOrdering() const
Gets default memory ordering as specified in requires directive.
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
llvm::DenseMap< llvm::Function *, unsigned > FunctionToUntiedTaskStackMap
Maps function to the position of the untied task locals stack.
void emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Emits the code to destroy the dependency object provided in depobj directive.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::ArrayType * KmpCriticalNameTy
Type kmp_critical_name, originally defined as typedef kmp_int32 kmp_critical_name[8];.
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
llvm::DenseMap< const OMPDeclareMapperDecl *, llvm::Function * > UDMMap
Map from the user-defined mapper declaration to its corresponding functions.
virtual void checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const Expr *LHS)
Checks if the provided LVal is lastprivate conditional and emits the code to update the value of the ...
std::pair< llvm::Value *, LValue > getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, SourceLocation Loc)
Returns the number of the elements and the address of the depobj dependency array.
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
void emitUsesAllocatorsInit(CodeGenFunction &CGF, const Expr *Allocator, const Expr *AllocatorTraits)
Initializes user defined allocators specified in the uses_allocators clauses.
llvm::Type * KmpRoutineEntryPtrTy
Type typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *);.
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::OpenMPIRBuilder & getOMPBuilder()
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value * > Args={}) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit an expression that denotes the number of threads a target region shall use.
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
virtual void checkAndEmitSharedLastprivateConditional(CodeGenFunction &CGF, const OMPExecutableDirective &D, const llvm::DenseSet< CanonicalDeclPtr< const VarDecl > > &IgnoredDecls)
Checks if the lastprivate conditional was updated in inner region and writes the value.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter)
Emit the target offloading code associated with D.
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
llvm::AtomicOrdering RequiresAtomicOrdering
Atomic ordering from the omp requires directive.
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
std::pair< llvm::Value *, Address > emitDependClause(CodeGenFunction &CGF, ArrayRef< OMPTaskDataTy::DependData > Dependencies, SourceLocation Loc)
Emits list of dependecies based on the provided data (array of dependence/expression pairs).
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target.
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
Address emitLastprivateConditionalInit(CodeGenFunction &CGF, const VarDecl *VD)
Create specialized alloca to handle lastprivate conditionals.
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction)
Emits the following code for reduction clause with task modifier:
virtual void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr)
Emits a masked region.
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
llvm::Function * emitReductionFunction(StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc, OpenMPNumThreadsClauseModifier Modifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32global_tid, kmp_int32 num_threads) ...
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
llvm::Function * emitTeamsOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond, llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier=OMPC_NUMTHREADS_unknown, OpenMPSeverityClauseKind Severity=OMPC_SEVERITY_fatal, const Expr *Message=nullptr) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc, llvm::AtomicOrdering AO) override
Emit flush of the variables specified in 'omp flush' directive.
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with 'depend' clause.
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits a masked region.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps) override
Emits a single region.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
llvm::Function * emitParallelOutlinedFunction(CodeGenFunction &CGF, const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancellation point' construct.
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc, bool IsWorksharingReduction) override
Emits the following code for reduction clause with task modifier:
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
void emitProcBindClause(CodeGenFunction &CGF, llvm::omp::ProcBindKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32global_tid, int proc_bind) to generate...
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for 'target' directive.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32global_tid, kmp_int32 num_teams,...
void emitForDispatchDeinit(CodeGenFunction &CGF, SourceLocation Loc) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
void emitMaskedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MaskedOpGen, SourceLocation Loc, const Expr *Filter=nullptr) override
Emits a masked region.
void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the task directive.
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, llvm::PointerIntPair< const Expr *, 2, OpenMPDeviceClauseModifier > Device, llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) override
Emit the target offloading code associated with D.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr * > NumIterations) override
Emit initialization for doacross loop nesting support.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for 'cancel' construct.
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPTaskDataTy &Data) override
Emit code for 'taskwait' directive.
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, CGOpenMPRuntime::TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
Emit task region for the taskloop directive.
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
API for captured statement code generation.
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
RAII for correct setting/restoring of CapturedStmtInfo.
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
bool Privatize()
Privatizes local variables previously registered as private.
bool addPrivate(const VarDecl *LocalVD, Address Addr)
Registers LocalVD variable as a private with Addr as the address of the corresponding private variabl...
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited.
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition CGExpr.cpp:3030
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount, Stmt::Likelihood LH=Stmt::LH_None, const Expr *ConditionalOp=nullptr, const VarDecl *ConditionalDecl=nullptr)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition CGDecl.cpp:2394
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
CGCapturedStmtInfo * CapturedStmtInfo
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
const LangOptions & getLangOpts() const
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition CGDecl.cpp:1482
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup.
Definition CGDecl.cpp:2278
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Load a pointer with type PtrTy stored at address Ptr.
Definition CGExpr.cpp:3039
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
const Decl * CurCodeDecl
CurCodeDecl - This is the inner-most code context, which includes blocks.
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition CGDecl.cpp:2251
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field, bool IsInBounds=true)
Definition CGExpr.cpp:5252
RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty, const Twine &Name="tmp")
CreateDefaultAlignedTempAlloca - This creates an alloca with the default ABI alignment of the given L...
Definition CGExpr.cpp:174
void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl< llvm::Value * > &CapturedVars)
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition CGExpr.cpp:242
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition CGExpr.cpp:2336
LValue EmitArraySectionExpr(const ArraySectionExpr *E, bool IsLowerBound=true)
Definition CGExpr.cpp:4835
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition CGExpr.cpp:223
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
LValue MakeRawAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment, AlignmentSource Source=AlignmentSource::Type)
Same as MakeAddrLValue above except that the pointer is known to be unsigned.
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference,...
Definition CGExpr.cpp:5426
void incrementProfileCounter(const Stmt *S, llvm::Value *StepV=nullptr)
Increment the profiler's counter for the given statement by StepV.
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements,...
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
void EmitStoreOfComplex(ComplexPairTy V, LValue dest, bool isInit)
EmitStoreOfComplex - Store a complex number into the specified l-value.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition CGDecl.cpp:2202
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition CGExpr.cpp:2533
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition CGExpr.cpp:3049
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition CGExpr.cpp:293
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind.
llvm::DenseMap< const ValueDecl *, FieldDecl * > LambdaCaptureFields
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::Type * ConvertTypeForMem(QualType T)
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
CodeGenTypes & getTypes() const
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, OMPTargetDataInfo &InputInfo)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition CGExpr.cpp:1515
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block,...
Definition CGStmt.cpp:672
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, const OMPExecutableDirective &D)
RawAddress CreateMemTemp(QualType T, const Twine &Name="tmp", RawAddress *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition CGExpr.cpp:186
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
Definition CGExpr.cpp:2997
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition CGDecl.cpp:203
llvm::Value * EmitCheckedInBoundsGEP(llvm::Type *ElemTy, llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void EmitOMPTargetParallelGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelGenericLoopDirective &S)
Emit device code for the target parallel loop directive.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type,...
static bool IsWrappedCXXThis(const Expr *E)
Check if E is a C++ "this" pointer wrapped in value-preserving casts.
Definition CGExpr.cpp:1573
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit)
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant,...
static void EmitOMPTargetTeamsGenericLoopDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsGenericLoopDirective &S)
Emit device code for the target teams loop directive.
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
LValue EmitLValue(const Expr *E, KnownNonNull_t IsKnownNonNull=NotKnownNonNull)
EmitLValue - Emit code to compute a designator that specifies the location of the expression.
Definition CGExpr.cpp:1631
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition CGDecl.cpp:1807
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition CGStmt.cpp:652
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition CGDecl.cpp:2092
LValue MakeNaturalAlignRawAddrLValue(llvm::Value *V, QualType T)
This class organizes the cross-function state that is used while generating LLVM code.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
llvm::Module & getModule() const
DiagnosticsEngine & getDiags() const
const LangOptions & getLangOpts() const
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
ASTContext & getContext() const
const CodeGenOptions & getCodeGenOpts() const
StringRef getMangledName(GlobalDecl GD)
std::optional< CharUnits > getOMPAllocateAlignment(const VarDecl *VD)
Return the alignment specified in an allocate directive, if present.
Definition CGDecl.cpp:2944
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition CGCall.cpp:1701
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition CGCall.cpp:739
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
A specialization of Address that requires the address to be an LLVM Constant.
Definition Address.h:296
static ConstantAddress invalid()
Definition Address.h:304
void pushTerminate()
Push a terminate handler on the stack.
void popTerminate()
Pops a terminate handler off the stack.
Definition CGCleanup.h:639
FunctionArgList - Type for representing both the decl and type of parameters to a function.
Definition CGCall.h:375
LValue - This represents an lvalue references.
Definition CGValue.h:182
CharUnits getAlignment() const
Definition CGValue.h:343
llvm::Value * getPointer(CodeGenFunction &CGF) const
const Qualifiers & getQuals() const
Definition CGValue.h:338
Address getAddress() const
Definition CGValue.h:361
LValueBaseInfo getBaseInfo() const
Definition CGValue.h:346
QualType getType() const
Definition CGValue.h:291
TBAAAccessInfo getTBAAInfo() const
Definition CGValue.h:335
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void Enter(CodeGenFunction &CGF)
RValue - This trivial value class is used to represent the result of an expression that is evaluated.
Definition CGValue.h:42
static RValue get(llvm::Value *V)
Definition CGValue.h:98
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition CGValue.h:108
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition CGValue.h:71
An abstract representation of an aligned address.
Definition Address.h:42
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition Address.h:77
llvm::Value * getPointer() const
Definition Address.h:66
static RawAddress invalid()
Definition Address.h:61
Class intended to support codegen of all kind of the reduction clauses.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
LValue getOrigLValue(unsigned N) const
Returns LValue for the original reduction item.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const VarDecl * getBaseDecl(unsigned N) const
Returns the base declaration of the reduction item.
QualType getPrivateType(unsigned N) const
Return the type of the private item.
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N)
Emits lvalue for the shared and original reduction item.
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item),...
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Origs, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations.
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
void operator()(CodeGenFunction &CGF) const
void setAction(PrePostActionTy &Action) const
ConstStmtVisitor - This class implements a simple visitor for Stmt subclasses.
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition DeclBase.h:1449
void addDecl(Decl *D)
Add the declaration D into this context.
A reference to a declared variable, function, enum, etc.
Definition Expr.h:1270
ValueDecl * getDecl()
Definition Expr.h:1338
Decl - This represents one declaration (or definition), e.g.
Definition DeclBase.h:86
T * getAttr() const
Definition DeclBase.h:573
bool hasAttrs() const
Definition DeclBase.h:518
ASTContext & getASTContext() const LLVM_READONLY
Definition DeclBase.cpp:524
void addAttr(Attr *A)
virtual Stmt * getBody() const
getBody - If this Decl represents a declaration for a body of code, such as a function or method defi...
Definition DeclBase.h:1087
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition DeclBase.h:559
SourceLocation getLocation() const
Definition DeclBase.h:439
DeclContext * getDeclContext()
Definition DeclBase.h:448
AttrVec & getAttrs()
Definition DeclBase.h:524
bool hasAttr() const
Definition DeclBase.h:577
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition DeclBase.h:978
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Decl.h:830
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition Diagnostic.h:904
This represents one expression.
Definition Expr.h:112
bool isIntegerConstantExpr(const ASTContext &Ctx) const
bool isGLValue() const
Definition Expr.h:287
@ SE_AllowSideEffects
Allow any unmodeled side effect.
Definition Expr.h:674
@ SE_AllowUndefinedBehavior
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition Expr.h:672
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition Expr.cpp:3090
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition Expr.cpp:3085
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
std::optional< llvm::APSInt > getIntegerConstantExpr(const ASTContext &Ctx) const
isIntegerConstantExpr - Return the value if this expression is a valid integer constant expression.
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition Expr.cpp:3665
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition Expr.cpp:273
QualType getType() const
Definition Expr.h:144
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition Expr.cpp:4030
Represents a member of a struct/union/class.
Definition Decl.h:3157
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition Decl.h:3242
const RecordDecl * getParent() const
Returns the parent of this field declaration, which is the struct in which this field is defined.
Definition Decl.h:3393
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, const IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition Decl.cpp:4641
Represents a function declaration or definition.
Definition Decl.h:1999
const ParmVarDecl * getParamDecl(unsigned i) const
Definition Decl.h:2794
QualType getReturnType() const
Definition Decl.h:2842
ArrayRef< ParmVarDecl * > parameters() const
Definition Decl.h:2771
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:3688
FunctionDecl * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition Decl.cpp:3767
FunctionDecl * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
GlobalDecl - represents a global declaration.
Definition GlobalDecl.h:57
const Decl * getDecl() const
Definition GlobalDecl.h:106
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition Decl.cpp:5474
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value 'V' and type 'type'.
Definition Expr.cpp:971
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition Expr.h:3298
ValueDecl * getMemberDecl() const
Retrieve the member declaration to which this expression refers.
Definition Expr.h:3381
Expr * getBase() const
Definition Expr.h:3375
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition Decl.h:300
bool isExternallyVisible() const
Definition Decl.h:432
This represents clause 'affinity' in the 'pragma omp task'-based directives.
ArrayRef< MappableComponent > MappableExprComponentListRef
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
This is a basic class for representing single OpenMP clause.
This represents 'pragma omp declare mapper ...' directive.
Definition DeclOpenMP.h:349
Expr * getMapperVarRef()
Get the variable declared in the mapper.
Definition DeclOpenMP.h:411
This represents 'pragma omp declare reduction ...' directive.
Definition DeclOpenMP.h:239
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition DeclOpenMP.h:300
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition DeclOpenMP.h:311
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition DeclOpenMP.h:288
Expr * getCombinerIn()
Get In variable of the combiner.
Definition DeclOpenMP.h:285
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition DeclOpenMP.h:282
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition DeclOpenMP.h:308
OMPDeclareReductionInitKind getInitializerKind() const
Get initializer kind.
Definition DeclOpenMP.h:303
This represents implicit clause 'depend' for the 'pragma omp task' directive.
This represents 'detach' clause in the 'pragma omp task' directive.
This represents 'device' clause in the 'pragma omp ...' directive.
This represents the 'doacross' clause for the 'pragma omp ordered' directive.
This represents 'if' clause in the 'pragma omp ...' directive.
Expr * getCondition() const
Returns condition.
This represents clause 'in_reduction' in the 'pragma omp task' directives.
OMPIteratorHelperData & getHelper(unsigned I)
Fetches helper data for the specified iteration space.
Definition Expr.cpp:5446
unsigned numOfIterators() const
Returns number of iterator definitions.
Definition ExprOpenMP.h:275
This represents clause 'lastprivate' in the 'pragma omp ...' directives.
This represents the 'message' clause in the 'pragma omp error' and the 'pragma omp parallel' directiv...
Expr * getMessageString() const
Returns message string of the clause.
This represents clause 'nontemporal' in the 'pragma omp ...' directives.
This represents 'nowait' clause in the 'pragma omp ...' directive.
This represents 'num_teams' clause in the 'pragma omp ...' directive.
This represents 'num_threads' clause in the 'pragma omp ...' directive.
This represents 'ordered' clause in the 'pragma omp ...' directive.
This represents clause 'private' in the 'pragma omp ...' directives.
This represents 'pragma omp requires...' directive.
Definition DeclOpenMP.h:479
clauselist_range clauselists()
Definition DeclOpenMP.h:504
This represents the 'severity' clause in the 'pragma omp error' and the 'pragma omp parallel' directi...
OpenMPSeverityClauseKind getSeverityKind() const
Returns kind of the clause.
This represents 'thread_limit' clause in the 'pragma omp ...' directive.
This represents clause 'uses_allocators' in the 'pragma omp target'-based directives.
This represents 'ompx_attribute' clause in a directive that might generate an outlined function.
This represents 'ompx_bare' clause in the 'pragma omp target teams ...' directive.
This represents 'ompx_dyn_cgroup_mem' clause in the 'pragma omp target ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class.
Definition Expr.h:1178
Represents a parameter to a function.
Definition Decl.h:1789
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition TypeBase.h:3328
Represents an unpacked "presumed" location which can be presented to the user.
unsigned getColumn() const
Return the presumed column number of this location.
const char * getFilename() const
Return the presumed filename of this location.
unsigned getLine() const
Return the presumed line number of this location.
A (possibly-)qualified type.
Definition TypeBase.h:937
void addRestrict()
Add the restrict qualifier to this QualType.
Definition TypeBase.h:1172
QualType withRestrict() const
Definition TypeBase.h:1175
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition TypeBase.h:8285
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition TypeBase.h:8325
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition TypeBase.h:8470
QualType getCanonicalType() const
Definition TypeBase.h:8337
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after.
Definition TypeBase.h:1545
Represents a struct/union/class.
Definition Decl.h:4309
field_iterator field_end() const
Definition Decl.h:4515
field_range fields() const
Definition Decl.h:4512
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition Decl.cpp:5170
bool field_empty() const
Definition Decl.h:4520
field_iterator field_begin() const
Definition Decl.cpp:5154
Scope - A scope is a transient data structure that is used while parsing the program.
Definition Scope.h:41
Encodes a location in the source.
static SourceLocation getFromRawEncoding(UIntTy Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
bool isValid() const
Return true if this is a valid SourceLocation object.
UIntTy getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it.
This class handles loading and caching of source files into memory.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
Stmt - This represents one statement.
Definition Stmt.h:85
child_range children()
Definition Stmt.cpp:295
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:334
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top,...
Definition Stmt.cpp:205
SourceLocation getBeginLoc() const LLVM_READONLY
Definition Stmt.cpp:346
void startDefinition()
Starts the definition of this tag declaration.
Definition Decl.cpp:4847
bool isUnion() const
Definition Decl.h:3919
The base class of the type hierarchy.
Definition TypeBase.h:1833
bool isVoidType() const
Definition TypeBase.h:8878
const Type * getPointeeOrArrayElementType() const
If this is a pointer type, return the pointee type.
Definition TypeBase.h:9058
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char,...
Definition Type.cpp:2205
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition Type.h:26
RecordDecl * getAsRecordDecl() const
Retrieves the RecordDecl this type refers to.
Definition Type.h:41
bool isArrayType() const
Definition TypeBase.h:8621
bool isPointerType() const
Definition TypeBase.h:8522
CanQualType getCanonicalTypeUnqualified() const
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition TypeBase.h:8922
const T * castAs() const
Member-template castAs<specific type>.
Definition TypeBase.h:9165
bool isReferenceType() const
Definition TypeBase.h:8546
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
bool isLValueReferenceType() const
Definition TypeBase.h:8550
bool isAggregateType() const
Determines whether the type is a C++ aggregate type or C aggregate or union type.
Definition Type.cpp:2411
RecordDecl * castAsRecordDecl() const
Definition Type.h:48
QualType getCanonicalTypeInternal() const
Definition TypeBase.h:3119
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition TypeBase.h:9051
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition TypeBase.h:2800
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type.
Definition TypeBase.h:9151
bool isFloatingType() const
Definition Type.cpp:2304
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition Type.cpp:2253
bool isAnyPointerType() const
Definition TypeBase.h:8530
const T * getAs() const
Member-template getAs<specific type>'.
Definition TypeBase.h:9098
bool isRecordType() const
Definition TypeBase.h:8649
bool isUnionType() const
Definition Type.cpp:718
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition Decl.h:711
QualType getType() const
Definition Decl.h:722
Represents a variable declaration or definition.
Definition Decl.h:925
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition Decl.cpp:2257
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition Decl.cpp:2366
const Expr * getInit() const
Definition Decl.h:1367
bool hasExternalStorage() const
Returns true if a variable has extern or private_extern storage.
Definition Decl.h:1216
bool hasLocalStorage() const
Returns true if a variable with function scope is a non-static local variable.
Definition Decl.h:1183
@ DeclarationOnly
This declaration is only a declaration.
Definition Decl.h:1294
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition Decl.cpp:2375
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition Decl.h:1261
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to.
Definition Decl.h:1357
Represents a C array with a specified size that is not an integer-constant-expression.
Definition TypeBase.h:3964
Expr * getSizeExpr() const
Definition TypeBase.h:3978
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition SPIR.cpp:35
bool isEmptyRecordForLayout(const ASTContext &Context, QualType T)
isEmptyRecordForLayout - Return true iff a structure contains only empty base classes (per isEmptyRec...
@ Type
The l-value was considered opaque, so the alignment was determined from a type.
Definition CGValue.h:154
@ Decl
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
Definition CGValue.h:145
bool isEmptyFieldForLayout(const ASTContext &Context, const FieldDecl *FD)
isEmptyFieldForLayout - Return true iff the field is "empty", that is, either a zero-width bit-field ...
The JSON file list parser is used to communicate input to InstallAPI.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
CanQual< Type > CanQualType
Represents a canonical, potentially-qualified type.
bool needsTaskBasedThreadLimit(OpenMPDirectiveKind DKind)
Checks if the specified target directive, combined or not, needs task based thread_limit.
@ Ctor_Complete
Complete object ctor.
Definition ABI.h:25
bool isa(CodeGen::Address addr)
Definition Address.h:330
if(T->getSizeExpr()) TRY_TO(TraverseStmt(const_cast< Expr * >(T -> getSizeExpr())))
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
@ Conditional
A conditional (?:) operator.
Definition Sema.h:667
@ ICIS_NoInit
No in-class initializer.
Definition Specifiers.h:272
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
@ LCK_ByRef
Capturing by reference.
Definition Lambda.h:37
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
@ Private
'private' clause, allowed on 'parallel', 'serial', 'loop', 'parallel loop', and 'serial loop' constru...
@ Vector
'vector' clause, allowed on 'loop', Combined, and 'routine' directives.
@ Reduction
'reduction' clause, allowed on Parallel, Serial, Loop, and the combined constructs.
@ Present
'present' clause, allowed on Compute and Combined constructs, plus 'data' and 'declare'.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition OpenMPKinds.h:39
@ OMPC_SCHEDULE_MODIFIER_last
Definition OpenMPKinds.h:44
@ OMPC_SCHEDULE_MODIFIER_unknown
Definition OpenMPKinds.h:40
@ AS_public
Definition Specifiers.h:124
nullptr
This class represents a compute construct, representing a 'Kind' of ‘parallel’, 'serial',...
@ CR_OpenMP
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Expr * Cond
};
static bool classof(const Stmt *T)
bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind)
Checks if the specified directive kind is one of tasking directives - task, taskloop,...
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
@ Result
The result type of a method or function.
Definition TypeBase.h:905
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const FunctionProtoType * T
OpenMPDependClauseKind
OpenMP attributes for 'depend' clause.
Definition OpenMPKinds.h:55
@ OMPC_DEPEND_unknown
Definition OpenMPKinds.h:59
@ Dtor_Complete
Complete object dtor.
Definition ABI.h:36
@ Union
The "union" keyword.
Definition TypeBase.h:5896
@ Type
The name was classified as a type.
Definition Sema.h:562
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
OpenMPSeverityClauseKind
OpenMP attributes for 'severity' clause.
@ OMPC_SEVERITY_unknown
LangAS
Defines the address space values used by the address space qualifier of QualType.
llvm::omp::Directive OpenMPDirectiveKind
OpenMP directives.
Definition OpenMPKinds.h:25
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
@ VK_PRValue
A pr-value expression (in the C++11 taxonomy) produces a temporary value.
Definition Specifiers.h:135
@ VK_LValue
An l-value expression is a reference to an object with independent storage.
Definition Specifiers.h:139
for(const auto &A :T->param_types())
void getOpenMPCaptureRegions(llvm::SmallVectorImpl< OpenMPDirectiveKind > &CaptureRegions, OpenMPDirectiveKind DKind)
Return the captured regions of an OpenMP directive.
OpenMPNumThreadsClauseModifier
@ OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
U cast(CodeGen::Address addr)
Definition Address.h:327
@ OMPC_DEVICE_unknown
Definition OpenMPKinds.h:51
OpenMPMapModifierKind
OpenMP modifier kind for 'map' clause.
Definition OpenMPKinds.h:79
@ OMPC_MAP_MODIFIER_unknown
Definition OpenMPKinds.h:80
@ Other
Other implicit parameter.
Definition Decl.h:1745
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition OpenMPKinds.h:31
@ OMPC_SCHEDULE_unknown
Definition OpenMPKinds.h:35
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition OpenMPKinds.h:71
@ OMPC_MAP_unknown
Definition OpenMPKinds.h:75
unsigned long uint64_t
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
#define false
Definition stdbool.h:26
struct with the values to be passed to the dispatch runtime function
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified)
Maps the expression for the lastprivate variable to the global copy used to store new value because o...
Struct with the values to be passed to the static runtime function.
bool IVSigned
Sign of the iteration variable.
Address UB
Address of the output variable in which the upper iteration number is returned.
Address IL
Address of the output variable in which the flag of the last iteration is returned.
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
unsigned IVSize
Size of the iteration variable in bits.
Address ST
Address of the output variable in which the stride value is returned necessary to generated the stati...
bool Ordered
true if loop is ordered, false otherwise.
Address LB
Address of the output variable in which the lower iteration number is returned.
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
llvm::CallingConv::ID getRuntimeCC() const
SmallVector< const Expr *, 4 > DepExprs
EvalResult is a struct with detailed info about an evaluated expression.
Definition Expr.h:645
Extra information about a function prototype.
Definition TypeBase.h:5349
Expr * CounterUpdate
Updater for the internal counter: ++CounterVD;.
Definition ExprOpenMP.h:121
Data for list of allocators.
Expr * AllocatorTraits
Allocator traits.
Scheduling data for loop-based OpenMP directives.
OpenMPScheduleClauseModifier M2
OpenMPScheduleClauseModifier M1
OpenMPScheduleClauseKind Schedule
Describes how types, statements, expressions, and declarations should be printed.