clang  15.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54  /// Kinds of OpenMP regions used in codegen.
55  enum CGOpenMPRegionKind {
56  /// Region with outlined function for standalone 'parallel'
57  /// directive.
58  ParallelOutlinedRegion,
59  /// Region with outlined function for standalone 'task' directive.
60  TaskOutlinedRegion,
61  /// Region for constructs that do not require function outlining,
62  /// like 'for', 'sections', 'atomic' etc. directives.
63  InlinedRegion,
64  /// Region with outlined function for standalone 'target' directive.
65  TargetRegion,
66  };
67 
68  CGOpenMPRegionInfo(const CapturedStmt &CS,
69  const CGOpenMPRegionKind RegionKind,
71  bool HasCancel)
72  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
77  bool HasCancel)
78  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79  Kind(Kind), HasCancel(HasCancel) {}
80 
81  /// Get a variable or parameter for storing global thread id
82  /// inside OpenMP construct.
83  virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85  /// Emit the captured statement body.
86  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88  /// Get an LValue for the current ThreadID variable.
89  /// \return LValue for thread id variable. This LValue always has type int32*.
90  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98  bool hasCancel() const { return HasCancel; }
99 
100  static bool classof(const CGCapturedStmtInfo *Info) {
101  return Info->getKind() == CR_OpenMP;
102  }
103 
104  ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107  CGOpenMPRegionKind RegionKind;
108  RegionCodeGenTy CodeGen;
110  bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117  const RegionCodeGenTy &CodeGen,
118  OpenMPDirectiveKind Kind, bool HasCancel,
119  StringRef HelperName)
120  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121  HasCancel),
122  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124  }
125 
126  /// Get a variable or parameter for storing global thread id
127  /// inside OpenMP construct.
128  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130  /// Get the name of the capture helper.
131  StringRef getHelperName() const override { return HelperName; }
132 
133  static bool classof(const CGCapturedStmtInfo *Info) {
134  return CGOpenMPRegionInfo::classof(Info) &&
135  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136  ParallelOutlinedRegion;
137  }
138 
139 private:
140  /// A variable or parameter storing global thread id for OpenMP
141  /// constructs.
142  const VarDecl *ThreadIDVar;
143  StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149  class UntiedTaskActionTy final : public PrePostActionTy {
150  bool Untied;
151  const VarDecl *PartIDVar;
152  const RegionCodeGenTy UntiedCodeGen;
153  llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155  public:
156  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157  const RegionCodeGenTy &UntiedCodeGen)
158  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159  void Enter(CodeGenFunction &CGF) override {
160  if (Untied) {
161  // Emit task switching point.
162  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163  CGF.GetAddrOfLocalVar(PartIDVar),
164  PartIDVar->getType()->castAs<PointerType>());
165  llvm::Value *Res =
166  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169  CGF.EmitBlock(DoneBB);
171  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173  CGF.Builder.GetInsertBlock());
174  emitUntiedSwitch(CGF);
175  }
176  }
177  void emitUntiedSwitch(CodeGenFunction &CGF) const {
178  if (Untied) {
179  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180  CGF.GetAddrOfLocalVar(PartIDVar),
181  PartIDVar->getType()->castAs<PointerType>());
182  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183  PartIdLVal);
184  UntiedCodeGen(CGF);
185  CodeGenFunction::JumpDest CurPoint =
186  CGF.getJumpDestInCurrentScope(".untied.next.");
187  CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190  CGF.Builder.GetInsertBlock());
191  CGF.EmitBranchThroughCleanup(CurPoint);
192  CGF.EmitBlock(CurPoint.getBlock());
193  }
194  }
195  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196  };
197  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198  const VarDecl *ThreadIDVar,
199  const RegionCodeGenTy &CodeGen,
200  OpenMPDirectiveKind Kind, bool HasCancel,
201  const UntiedTaskActionTy &Action)
202  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203  ThreadIDVar(ThreadIDVar), Action(Action) {
204  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205  }
206 
207  /// Get a variable or parameter for storing global thread id
208  /// inside OpenMP construct.
209  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211  /// Get an LValue for the current ThreadID variable.
212  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214  /// Get the name of the capture helper.
215  StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217  void emitUntiedSwitch(CodeGenFunction &CGF) override {
218  Action.emitUntiedSwitch(CGF);
219  }
220 
221  static bool classof(const CGCapturedStmtInfo *Info) {
222  return CGOpenMPRegionInfo::classof(Info) &&
223  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224  TaskOutlinedRegion;
225  }
226 
227 private:
228  /// A variable or parameter storing global thread id for OpenMP
229  /// constructs.
230  const VarDecl *ThreadIDVar;
231  /// Action for emitting code for untied tasks.
232  const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240  const RegionCodeGenTy &CodeGen,
241  OpenMPDirectiveKind Kind, bool HasCancel)
242  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243  OldCSI(OldCSI),
244  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246  // Retrieve the value of the context parameter.
247  llvm::Value *getContextValue() const override {
248  if (OuterRegionInfo)
249  return OuterRegionInfo->getContextValue();
250  llvm_unreachable("No context value for inlined OpenMP region");
251  }
252 
253  void setContextValue(llvm::Value *V) override {
254  if (OuterRegionInfo) {
255  OuterRegionInfo->setContextValue(V);
256  return;
257  }
258  llvm_unreachable("No context value for inlined OpenMP region");
259  }
260 
261  /// Lookup the captured field decl for a variable.
262  const FieldDecl *lookup(const VarDecl *VD) const override {
263  if (OuterRegionInfo)
264  return OuterRegionInfo->lookup(VD);
265  // If there is no outer outlined region,no need to lookup in a list of
266  // captured variables, we can use the original one.
267  return nullptr;
268  }
269 
270  FieldDecl *getThisFieldDecl() const override {
271  if (OuterRegionInfo)
272  return OuterRegionInfo->getThisFieldDecl();
273  return nullptr;
274  }
275 
276  /// Get a variable or parameter for storing global thread id
277  /// inside OpenMP construct.
278  const VarDecl *getThreadIDVariable() const override {
279  if (OuterRegionInfo)
280  return OuterRegionInfo->getThreadIDVariable();
281  return nullptr;
282  }
283 
284  /// Get an LValue for the current ThreadID variable.
285  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286  if (OuterRegionInfo)
287  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288  llvm_unreachable("No LValue for inlined OpenMP construct");
289  }
290 
291  /// Get the name of the capture helper.
292  StringRef getHelperName() const override {
293  if (auto *OuterRegionInfo = getOldCSI())
294  return OuterRegionInfo->getHelperName();
295  llvm_unreachable("No helper name for inlined OpenMP construct");
296  }
297 
298  void emitUntiedSwitch(CodeGenFunction &CGF) override {
299  if (OuterRegionInfo)
300  OuterRegionInfo->emitUntiedSwitch(CGF);
301  }
302 
303  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305  static bool classof(const CGCapturedStmtInfo *Info) {
306  return CGOpenMPRegionInfo::classof(Info) &&
307  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308  }
309 
310  ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313  /// CodeGen info about outer OpenMP region.
315  CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326  const RegionCodeGenTy &CodeGen, StringRef HelperName)
327  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328  /*HasCancel=*/false),
329  HelperName(HelperName) {}
330 
331  /// This is unused for target regions because each starts executing
332  /// with a single thread.
333  const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335  /// Get the name of the capture helper.
336  StringRef getHelperName() const override { return HelperName; }
337 
338  static bool classof(const CGCapturedStmtInfo *Info) {
339  return CGOpenMPRegionInfo::classof(Info) &&
340  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341  }
342 
343 private:
344  StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348  llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356  OMPD_unknown,
357  /*HasCancel=*/false),
358  PrivScope(CGF) {
359  // Make sure the globals captured in the provided statement are local by
360  // using the privatization logic. We assume the same variable is not
361  // captured more than once.
362  for (const auto &C : CS.captures()) {
363  if (!C.capturesVariable() && !C.capturesVariableByCopy())
364  continue;
365 
366  const VarDecl *VD = C.getCapturedVar();
367  if (VD->isLocalVarDeclOrParm())
368  continue;
369 
370  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371  /*RefersToEnclosingVariableOrCapture=*/false,
373  C.getLocation());
374  PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375  }
376  (void)PrivScope.Privatize();
377  }
378 
379  /// Lookup the captured field decl for a variable.
380  const FieldDecl *lookup(const VarDecl *VD) const override {
381  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382  return FD;
383  return nullptr;
384  }
385 
386  /// Emit the captured statement body.
387  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388  llvm_unreachable("No body for expressions");
389  }
390 
391  /// Get a variable or parameter for storing global thread id
392  /// inside OpenMP construct.
393  const VarDecl *getThreadIDVariable() const override {
394  llvm_unreachable("No thread id for expressions");
395  }
396 
397  /// Get the name of the capture helper.
398  StringRef getHelperName() const override {
399  llvm_unreachable("No helper name for expressions");
400  }
401 
402  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405  /// Private scope to capture global variables.
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411  CodeGenFunction &CGF;
412  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413  FieldDecl *LambdaThisCaptureField = nullptr;
414  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415  bool NoInheritance = false;
416 
417 public:
418  /// Constructs region for combined constructs.
419  /// \param CodeGen Code generation sequence for combined directives. Includes
420  /// a list of functions used for code generation of implicitly inlined
421  /// regions.
422  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423  OpenMPDirectiveKind Kind, bool HasCancel,
424  bool NoInheritance = true)
425  : CGF(CGF), NoInheritance(NoInheritance) {
426  // Start emission for the construct.
427  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429  if (NoInheritance) {
430  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432  CGF.LambdaThisCaptureField = nullptr;
433  BlockInfo = CGF.BlockInfo;
434  CGF.BlockInfo = nullptr;
435  }
436  }
437 
438  ~InlinedOpenMPRegionRAII() {
439  // Restore original CapturedStmtInfo only if we're done with code emission.
440  auto *OldCSI =
441  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442  delete CGF.CapturedStmtInfo;
443  CGF.CapturedStmtInfo = OldCSI;
444  if (NoInheritance) {
445  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447  CGF.BlockInfo = BlockInfo;
448  }
449  }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456  /// Use trampoline for internal microtask.
457  OMP_IDENT_IMD = 0x01,
458  /// Use c-style ident structure.
459  OMP_IDENT_KMPC = 0x02,
460  /// Atomic reduction option for kmpc_reduce.
461  OMP_ATOMIC_REDUCE = 0x10,
462  /// Explicit 'barrier' directive.
463  OMP_IDENT_BARRIER_EXPL = 0x20,
464  /// Implicit barrier in code.
465  OMP_IDENT_BARRIER_IMPL = 0x40,
466  /// Implicit barrier in 'for' directive.
467  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468  /// Implicit barrier in 'sections' directive.
469  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470  /// Implicit barrier in 'single' directive.
471  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472  /// Call of __kmp_for_static_init for static loop.
473  OMP_IDENT_WORK_LOOP = 0x200,
474  /// Call of __kmp_for_static_init for sections.
475  OMP_IDENT_WORK_SECTIONS = 0x400,
476  /// Call of __kmp_for_static_init for distribute.
477  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
483 /// Values for bit flags for marking which requires clauses have been used.
485  /// flag undefined.
486  OMP_REQ_UNDEFINED = 0x000,
487  /// no requires clause present.
488  OMP_REQ_NONE = 0x001,
489  /// reverse_offload clause.
490  OMP_REQ_REVERSE_OFFLOAD = 0x002,
491  /// unified_address clause.
492  OMP_REQ_UNIFIED_ADDRESS = 0x004,
493  /// unified_shared_memory clause.
494  OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
495  /// dynamic_allocators clause.
496  OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
497  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
501  /// Device ID if the device was not defined, runtime should get it
502  /// from environment variables in the spec.
503  OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 /// kmp_int32 reserved_1; /**< might be used in Fortran;
513 /// see above */
514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
515 /// KMP_IDENT_KMPC identifies this union
516 /// member */
517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
518 /// see above */
519 ///#if USE_ITT_BUILD
520 /// /* but currently used for storing
521 /// region-specific ITT */
522 /// /* contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
525 /// C++ */
526 /// char const *psource; /**< String describing the source location.
527 /// The string is composed of semi-colon separated
528 // fields which describe the source file,
529 /// the function and a pair of line numbers that
530 /// delimit the construct.
531 /// */
532 /// } ident_t;
533 enum IdentFieldIndex {
534  /// might be used in Fortran
535  IdentField_Reserved_1,
536  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537  IdentField_Flags,
538  /// Not really used in Fortran any more
539  IdentField_Reserved_2,
540  /// Source[4] in Fortran, do not use for C++
541  IdentField_Reserved_3,
542  /// String describing the source location. The string is composed of
543  /// semi-colon separated fields which describe the source file, the function
544  /// and a pair of line numbers that delimit the construct.
545  IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551  /// Lower bound for default (unordered) versions.
552  OMP_sch_lower = 32,
553  OMP_sch_static_chunked = 33,
554  OMP_sch_static = 34,
555  OMP_sch_dynamic_chunked = 35,
556  OMP_sch_guided_chunked = 36,
557  OMP_sch_runtime = 37,
558  OMP_sch_auto = 38,
559  /// static with chunk adjustment (e.g., simd)
560  OMP_sch_static_balanced_chunked = 45,
561  /// Lower bound for 'ordered' versions.
562  OMP_ord_lower = 64,
563  OMP_ord_static_chunked = 65,
564  OMP_ord_static = 66,
565  OMP_ord_dynamic_chunked = 67,
566  OMP_ord_guided_chunked = 68,
567  OMP_ord_runtime = 69,
568  OMP_ord_auto = 70,
569  OMP_sch_default = OMP_sch_static,
570  /// dist_schedule types
571  OMP_dist_sch_static_chunked = 91,
572  OMP_dist_sch_static = 92,
573  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574  /// Set if the monotonic schedule modifier was present.
575  OMP_sch_modifier_monotonic = (1 << 29),
576  /// Set if the nonmonotonic schedule modifier was present.
577  OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583  PrePostActionTy *Action;
584 
585 public:
586  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588  if (!CGF.HaveInsertPoint())
589  return;
590  Action->Exit(CGF);
591  }
592 };
593 
594 } // anonymous namespace
595 
598  if (PrePostAction) {
599  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600  Callback(CodeGen, CGF, *PrePostAction);
601  } else {
602  PrePostActionTy Action;
603  Callback(CodeGen, CGF, Action);
604  }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613  if (const auto *DRE =
614  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616  return DRD;
617  return nullptr;
618 }
619 
621  const OMPDeclareReductionDecl *DRD,
622  const Expr *InitOp,
623  Address Private, Address Original,
624  QualType Ty) {
625  if (DRD->getInitializer()) {
626  std::pair<llvm::Function *, llvm::Function *> Reduction =
628  const auto *CE = cast<CallExpr>(InitOp);
629  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632  const auto *LHSDRE =
633  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634  const auto *RHSDRE =
635  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639  (void)PrivateScope.Privatize();
640  RValue Func = RValue::get(Reduction.second);
641  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642  CGF.EmitIgnoredExpr(InitOp);
643  } else {
644  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646  auto *GV = new llvm::GlobalVariable(
647  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648  llvm::GlobalValue::PrivateLinkage, Init, Name);
649  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650  RValue InitRVal;
651  switch (CGF.getEvaluationKind(Ty)) {
652  case TEK_Scalar:
653  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654  break;
655  case TEK_Complex:
656  InitRVal =
658  break;
659  case TEK_Aggregate: {
660  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663  /*IsInitializer=*/false);
664  return;
665  }
666  }
667  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670  /*IsInitializer=*/false);
671  }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680  QualType Type, bool EmitDeclareReductionInit,
681  const Expr *Init,
682  const OMPDeclareReductionDecl *DRD,
683  Address SrcAddr = Address::invalid()) {
684  // Perform element-by-element initialization.
685  QualType ElementTy;
686 
687  // Drill down to the base element type on both arrays.
688  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690  if (DRD)
691  SrcAddr =
692  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694  llvm::Value *SrcBegin = nullptr;
695  if (DRD)
696  SrcBegin = SrcAddr.getPointer();
697  llvm::Value *DestBegin = DestAddr.getPointer();
698  // Cast from pointer to array type to pointer to single element.
699  llvm::Value *DestEnd =
700  CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701  // The basic structure here is a while-do loop.
702  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704  llvm::Value *IsEmpty =
705  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708  // Enter the loop body, making that address the current address.
709  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710  CGF.EmitBlock(BodyBB);
711 
712  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714  llvm::PHINode *SrcElementPHI = nullptr;
715  Address SrcElementCurrent = Address::invalid();
716  if (DRD) {
717  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718  "omp.arraycpy.srcElementPast");
719  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720  SrcElementCurrent =
721  Address(SrcElementPHI, SrcAddr.getElementType(),
722  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723  }
724  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726  DestElementPHI->addIncoming(DestBegin, EntryBB);
727  Address DestElementCurrent =
728  Address(DestElementPHI, DestAddr.getElementType(),
729  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731  // Emit copy.
732  {
733  CodeGenFunction::RunCleanupsScope InitScope(CGF);
734  if (EmitDeclareReductionInit) {
735  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736  SrcElementCurrent, ElementTy);
737  } else
738  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739  /*IsInitializer=*/false);
740  }
741 
742  if (DRD) {
743  // Shift the address forward by one element.
744  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745  SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746  "omp.arraycpy.dest.element");
747  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748  }
749 
750  // Shift the address forward by one element.
751  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752  DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753  "omp.arraycpy.dest.element");
754  // Check whether we've reached the end.
755  llvm::Value *Done =
756  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760  // Done.
761  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765  return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769  const Expr *E) {
770  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772  return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777  const OMPDeclareReductionDecl *DRD) {
778  // Emit VarDecl with copy init for arrays.
779  // Get the address of the original variable captured in current
780  // captured region.
781  const auto *PrivateVD =
782  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783  bool EmitDeclareReductionInit =
784  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786  EmitDeclareReductionInit,
787  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788  : PrivateVD->getInit(),
789  DRD, SharedAddr);
790 }
791 
794  ArrayRef<const Expr *> Privates,
795  ArrayRef<const Expr *> ReductionOps) {
796  ClausesData.reserve(Shareds.size());
797  SharedAddresses.reserve(Shareds.size());
798  Sizes.reserve(Shareds.size());
799  BaseDecls.reserve(Shareds.size());
800  const auto *IOrig = Origs.begin();
801  const auto *IPriv = Privates.begin();
802  const auto *IRed = ReductionOps.begin();
803  for (const Expr *Ref : Shareds) {
804  ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805  std::advance(IOrig, 1);
806  std::advance(IPriv, 1);
807  std::advance(IRed, 1);
808  }
809 }
810 
812  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813  "Number of generated lvalues must be exactly N.");
814  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816  SharedAddresses.emplace_back(First, Second);
817  if (ClausesData[N].Shared == ClausesData[N].Ref) {
818  OrigAddresses.emplace_back(First, Second);
819  } else {
820  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822  OrigAddresses.emplace_back(First, Second);
823  }
824 }
825 
827  QualType PrivateType = getPrivateType(N);
828  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829  if (!PrivateType->isVariablyModifiedType()) {
830  Sizes.emplace_back(
831  CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832  nullptr);
833  return;
834  }
835  llvm::Value *Size;
836  llvm::Value *SizeInChars;
837  auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839  if (AsArraySection) {
840  Size = CGF.Builder.CreatePtrDiff(ElemType,
841  OrigAddresses[N].second.getPointer(CGF),
842  OrigAddresses[N].first.getPointer(CGF));
843  Size = CGF.Builder.CreateNUWAdd(
844  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846  } else {
847  SizeInChars =
848  CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850  }
851  Sizes.emplace_back(SizeInChars, Size);
853  CGF,
854  cast<OpaqueValueExpr>(
855  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856  RValue::get(Size));
857  CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
861  llvm::Value *Size) {
862  QualType PrivateType = getPrivateType(N);
863  if (!PrivateType->isVariablyModifiedType()) {
864  assert(!Size && !Sizes[N].second &&
865  "Size should be nullptr for non-variably modified reduction "
866  "items.");
867  return;
868  }
870  CGF,
871  cast<OpaqueValueExpr>(
872  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873  RValue::get(Size));
874  CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
878  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880  assert(SharedAddresses.size() > N && "No variable was generated");
881  const auto *PrivateVD =
882  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883  const OMPDeclareReductionDecl *DRD =
884  getReductionInit(ClausesData[N].ReductionOp);
885  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886  if (DRD && DRD->getInitializer())
887  (void)DefaultInit(CGF);
888  emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890  (void)DefaultInit(CGF);
891  QualType SharedType = SharedAddresses[N].first.getType();
892  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893  PrivateAddr, SharedAddr, SharedType);
894  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897  PrivateVD->getType().getQualifiers(),
898  /*IsInitializer=*/false);
899  }
900 }
901 
903  QualType PrivateType = getPrivateType(N);
904  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905  return DTorKind != QualType::DK_none;
906 }
907 
909  Address PrivateAddr) {
910  QualType PrivateType = getPrivateType(N);
911  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912  if (needCleanups(N)) {
913  PrivateAddr = CGF.Builder.CreateElementBitCast(
914  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916  }
917 }
918 
920  LValue BaseLV) {
921  BaseTy = BaseTy.getNonReferenceType();
922  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926  } else {
927  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929  }
930  BaseTy = BaseTy->getPointeeType();
931  }
932  return CGF.MakeAddrLValue(
933  CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934  CGF.ConvertTypeForMem(ElTy)),
935  BaseLV.getType(), BaseLV.getBaseInfo(),
936  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
940  Address OriginalBaseAddress, llvm::Value *Addr) {
941  Address Tmp = Address::invalid();
942  Address TopTmp = Address::invalid();
943  Address MostTopTmp = Address::invalid();
944  BaseTy = BaseTy.getNonReferenceType();
945  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947  Tmp = CGF.CreateMemTemp(BaseTy);
948  if (TopTmp.isValid())
949  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950  else
951  MostTopTmp = Tmp;
952  TopTmp = Tmp;
953  BaseTy = BaseTy->getPointeeType();
954  }
955 
956  if (Tmp.isValid()) {
958  Addr, Tmp.getElementType());
959  CGF.Builder.CreateStore(Addr, Tmp);
960  return MostTopTmp;
961  }
962 
964  Addr, OriginalBaseAddress.getType());
965  return OriginalBaseAddress.withPointer(Addr);
966 }
967 
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969  const VarDecl *OrigVD = nullptr;
970  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973  Base = TempOASE->getBase()->IgnoreParenImpCasts();
974  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975  Base = TempASE->getBase()->IgnoreParenImpCasts();
976  DE = cast<DeclRefExpr>(Base);
977  OrigVD = cast<VarDecl>(DE->getDecl());
978  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981  Base = TempASE->getBase()->IgnoreParenImpCasts();
982  DE = cast<DeclRefExpr>(Base);
983  OrigVD = cast<VarDecl>(DE->getDecl());
984  }
985  return OrigVD;
986 }
987 
989  Address PrivateAddr) {
990  const DeclRefExpr *DE;
991  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992  BaseDecls.emplace_back(OrigVD);
993  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994  LValue BaseLValue =
995  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996  OriginalBaseLValue);
997  Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999  SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000  SharedAddr.getPointer());
1001  llvm::Value *PrivatePointer =
1003  PrivateAddr.getPointer(), SharedAddr.getType());
1004  llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005  SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006  return castToBase(CGF, OrigVD->getType(),
1007  SharedAddresses[N].first.getType(),
1008  OriginalBaseLValue.getAddress(CGF), Ptr);
1009  }
1010  BaseDecls.emplace_back(
1011  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012  return PrivateAddr;
1013 }
1014 
1016  const OMPDeclareReductionDecl *DRD =
1017  getReductionInit(ClausesData[N].ReductionOp);
1018  return DRD && DRD->getInitializer();
1019 }
1020 
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022  return CGF.EmitLoadOfPointerLValue(
1023  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024  getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028  if (!CGF.HaveInsertPoint())
1029  return;
1030  // 1.2.2 OpenMP Language Terminology
1031  // Structured block - An executable statement with a single entry at the
1032  // top and a single exit at the bottom.
1033  // The point of exit cannot be a branch out of the structured block.
1034  // longjmp() and throw() must not violate the entry/exit criteria.
1035  CGF.EHStack.pushTerminate();
1036  if (S)
1037  CGF.incrementProfileCounter(S);
1038  CodeGen(CGF);
1039  CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043  CodeGenFunction &CGF) {
1044  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045  getThreadIDVariable()->getType(),
1047 }
1048 
1050  QualType FieldTy) {
1051  auto *Field = FieldDecl::Create(
1052  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053  C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055  Field->setAccess(AS_public);
1056  DC->addDecl(Field);
1057  return Field;
1058 }
1059 
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061  StringRef Separator)
1062  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063  OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065 
1066  // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067  OMPBuilder.initialize();
1068  loadOffloadInfoMetadata();
1069 }
1070 
1072  InternalVars.clear();
1073  // Clean non-target variable declarations possibly used only in debug info.
1074  for (const auto &Data : EmittedNonTargetVariables) {
1075  if (!Data.getValue().pointsToAliveValue())
1076  continue;
1077  auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078  if (!GV)
1079  continue;
1080  if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081  continue;
1082  GV->eraseFromParent();
1083  }
1084 }
1085 
1087  SmallString<128> Buffer;
1088  llvm::raw_svector_ostream OS(Buffer);
1089  StringRef Sep = FirstSeparator;
1090  for (StringRef Part : Parts) {
1091  OS << Sep << Part;
1092  Sep = Separator;
1093  }
1094  return std::string(OS.str());
1095 }
1096 
1097 static llvm::Function *
1099  const Expr *CombinerInitializer, const VarDecl *In,
1100  const VarDecl *Out, bool IsCombiner) {
1101  // void .omp_combiner.(Ty *in, Ty *out);
1102  ASTContext &C = CGM.getContext();
1103  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104  FunctionArgList Args;
1105  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109  Args.push_back(&OmpOutParm);
1110  Args.push_back(&OmpInParm);
1111  const CGFunctionInfo &FnInfo =
1112  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114  std::string Name = CGM.getOpenMPRuntime().getName(
1115  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117  Name, &CGM.getModule());
1118  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119  if (CGM.getLangOpts().Optimize) {
1120  Fn->removeFnAttr(llvm::Attribute::NoInline);
1121  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123  }
1124  CodeGenFunction CGF(CGM);
1125  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128  Out->getLocation());
1130  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131  Scope.addPrivate(
1132  In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133  .getAddress(CGF));
1134  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135  Scope.addPrivate(
1136  Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137  .getAddress(CGF));
1138  (void)Scope.Privatize();
1139  if (!IsCombiner && Out->hasInit() &&
1140  !CGF.isTrivialInitializer(Out->getInit())) {
1141  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142  Out->getType().getQualifiers(),
1143  /*IsInitializer=*/true);
1144  }
1145  if (CombinerInitializer)
1146  CGF.EmitIgnoredExpr(CombinerInitializer);
1147  Scope.ForceCleanup();
1148  CGF.FinishFunction();
1149  return Fn;
1150 }
1151 
1153  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154  if (UDRMap.count(D) > 0)
1155  return;
1156  llvm::Function *Combiner = emitCombinerOrInitializer(
1157  CGM, D->getType(), D->getCombiner(),
1158  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160  /*IsCombiner=*/true);
1161  llvm::Function *Initializer = nullptr;
1162  if (const Expr *Init = D->getInitializer()) {
1164  CGM, D->getType(),
1166  : nullptr,
1167  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169  /*IsCombiner=*/false);
1170  }
1171  UDRMap.try_emplace(D, Combiner, Initializer);
1172  if (CGF) {
1173  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174  Decls.second.push_back(D);
1175  }
1176 }
1177 
1178 std::pair<llvm::Function *, llvm::Function *>
1180  auto I = UDRMap.find(D);
1181  if (I != UDRMap.end())
1182  return I->second;
1183  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184  return UDRMap.lookup(D);
1185 }
1186 
1187 namespace {
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII {
1191  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192  bool HasCancel, llvm::omp::Directive Kind)
1193  : OMPBuilder(OMPBuilder) {
1194  if (!OMPBuilder)
1195  return;
1196 
1197  // The following callback is the crucial part of clangs cleanup process.
1198  //
1199  // NOTE:
1200  // Once the OpenMPIRBuilder is used to create parallel regions (and
1201  // similar), the cancellation destination (Dest below) is determined via
1202  // IP. That means if we have variables to finalize we split the block at IP,
1203  // use the new block (=BB) as destination to build a JumpDest (via
1204  // getJumpDestInCurrentScope(BB)) which then is fed to
1205  // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206  // to push & pop an FinalizationInfo object.
1207  // The FiniCB will still be needed but at the point where the
1208  // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209  auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210  assert(IP.getBlock()->end() == IP.getPoint() &&
1211  "Clang CG should cause non-terminated block!");
1212  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213  CGF.Builder.restoreIP(IP);
1215  CGF.getOMPCancelDestination(OMPD_parallel);
1216  CGF.EmitBranchThroughCleanup(Dest);
1217  };
1218 
1219  // TODO: Remove this once we emit parallel regions through the
1220  // OpenMPIRBuilder as it can do this setup internally.
1221  llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222  OMPBuilder->pushFinalizationCB(std::move(FI));
1223  }
1224  ~PushAndPopStackRAII() {
1225  if (OMPBuilder)
1226  OMPBuilder->popFinalizationCB();
1227  }
1228  llvm::OpenMPIRBuilder *OMPBuilder;
1229 };
1230 } // namespace
1231 
1233  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236  assert(ThreadIDVar->getType()->isPointerType() &&
1237  "thread id variable must be of type kmp_int32 *");
1238  CodeGenFunction CGF(CGM, true);
1239  bool HasCancel = false;
1240  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241  HasCancel = OPD->hasCancel();
1242  else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243  HasCancel = OPD->hasCancel();
1244  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245  HasCancel = OPSD->hasCancel();
1246  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247  HasCancel = OPFD->hasCancel();
1248  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249  HasCancel = OPFD->hasCancel();
1250  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251  HasCancel = OPFD->hasCancel();
1252  else if (const auto *OPFD =
1253  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254  HasCancel = OPFD->hasCancel();
1255  else if (const auto *OPFD =
1256  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257  HasCancel = OPFD->hasCancel();
1258 
1259  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260  // parallel region to make cancellation barriers work properly.
1261  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264  HasCancel, OutlinedHelperName);
1265  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266  return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267 }
1268 
1270  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1274  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275 }
1276 
1278  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1282  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283 }
1284 
1286  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289  bool Tied, unsigned &NumberOfParts) {
1290  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291  PrePostActionTy &) {
1292  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294  llvm::Value *TaskArgs[] = {
1295  UpLoc, ThreadID,
1296  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297  TaskTVar->getType()->castAs<PointerType>())
1298  .getPointer(CGF)};
1299  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300  CGM.getModule(), OMPRTL___kmpc_omp_task),
1301  TaskArgs);
1302  };
1303  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304  UntiedCodeGen);
1305  CodeGen.setAction(Action);
1306  assert(!ThreadIDVar->getType()->isPointerType() &&
1307  "thread id variable must be of type kmp_int32 for tasks");
1308  const OpenMPDirectiveKind Region =
1309  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310  : OMPD_task;
1311  const CapturedStmt *CS = D.getCapturedStmt(Region);
1312  bool HasCancel = false;
1313  if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314  HasCancel = TD->hasCancel();
1315  else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316  HasCancel = TD->hasCancel();
1317  else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318  HasCancel = TD->hasCancel();
1319  else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320  HasCancel = TD->hasCancel();
1321 
1322  CodeGenFunction CGF(CGM, true);
1323  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324  InnermostKind, HasCancel, Action);
1325  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327  if (!Tied)
1328  NumberOfParts = Action.getNumberOfParts();
1329  return Res;
1330 }
1331 
1333  bool AtCurrentPoint) {
1334  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1335  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1336 
1337  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338  if (AtCurrentPoint) {
1339  Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1340  Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1341  } else {
1342  Elem.second.ServiceInsertPt =
1343  new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1344  Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1345  }
1346 }
1347 
1349  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1350  if (Elem.second.ServiceInsertPt) {
1351  llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1352  Elem.second.ServiceInsertPt = nullptr;
1353  Ptr->eraseFromParent();
1354  }
1355 }
1356 
1358  SourceLocation Loc,
1359  SmallString<128> &Buffer) {
1360  llvm::raw_svector_ostream OS(Buffer);
1361  // Build debug location
1363  OS << ";" << PLoc.getFilename() << ";";
1364  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1365  OS << FD->getQualifiedNameAsString();
1366  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1367  return OS.str();
1368 }
1369 
1371  SourceLocation Loc,
1372  unsigned Flags) {
1373  uint32_t SrcLocStrSize;
1374  llvm::Constant *SrcLocStr;
1375  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1376  Loc.isInvalid()) {
1377  SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1378  } else {
1379  std::string FunctionName;
1380  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1381  FunctionName = FD->getQualifiedNameAsString();
1383  const char *FileName = PLoc.getFilename();
1384  unsigned Line = PLoc.getLine();
1385  unsigned Column = PLoc.getColumn();
1386  SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1387  Column, SrcLocStrSize);
1388  }
1389  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1390  return OMPBuilder.getOrCreateIdent(
1391  SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1392 }
1393 
1395  SourceLocation Loc) {
1396  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1397  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1398  // the clang invariants used below might be broken.
1399  if (CGM.getLangOpts().OpenMPIRBuilder) {
1400  SmallString<128> Buffer;
1401  OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1402  uint32_t SrcLocStrSize;
1403  auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1404  getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1405  return OMPBuilder.getOrCreateThreadID(
1406  OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1407  }
1408 
1409  llvm::Value *ThreadID = nullptr;
1410  // Check whether we've already cached a load of the thread id in this
1411  // function.
1412  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1413  if (I != OpenMPLocThreadIDMap.end()) {
1414  ThreadID = I->second.ThreadID;
1415  if (ThreadID != nullptr)
1416  return ThreadID;
1417  }
1418  // If exceptions are enabled, do not use parameter to avoid possible crash.
1419  if (auto *OMPRegionInfo =
1420  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1421  if (OMPRegionInfo->getThreadIDVariable()) {
1422  // Check if this an outlined function with thread id passed as argument.
1423  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1424  llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1425  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1426  !CGF.getLangOpts().CXXExceptions ||
1427  CGF.Builder.GetInsertBlock() == TopBlock ||
1428  !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1429  cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1430  TopBlock ||
1431  cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432  CGF.Builder.GetInsertBlock()) {
1433  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1434  // If value loaded in entry block, cache it and use it everywhere in
1435  // function.
1436  if (CGF.Builder.GetInsertBlock() == TopBlock) {
1437  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1438  Elem.second.ThreadID = ThreadID;
1439  }
1440  return ThreadID;
1441  }
1442  }
1443  }
1444 
1445  // This is not an outlined function region - need to call __kmpc_int32
1446  // kmpc_global_thread_num(ident_t *loc).
1447  // Generate thread id value and cache this value for use across the
1448  // function.
1449  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1450  if (!Elem.second.ServiceInsertPt)
1452  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454  llvm::CallInst *Call = CGF.Builder.CreateCall(
1455  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456  OMPRTL___kmpc_global_thread_num),
1457  emitUpdateLocation(CGF, Loc));
1458  Call->setCallingConv(CGF.getRuntimeCC());
1459  Elem.second.ThreadID = Call;
1460  return Call;
1461 }
1462 
1464  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1467  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468  }
1469  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470  for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471  UDRMap.erase(D);
1472  FunctionUDRMap.erase(CGF.CurFn);
1473  }
1474  auto I = FunctionUDMMap.find(CGF.CurFn);
1475  if (I != FunctionUDMMap.end()) {
1476  for(const auto *D : I->second)
1477  UDMMap.erase(D);
1478  FunctionUDMMap.erase(I);
1479  }
1480  LastprivateConditionalToTypes.erase(CGF.CurFn);
1481  FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482 }
1483 
1485  return OMPBuilder.IdentPtr;
1486 }
1487 
1488 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489  if (!Kmpc_MicroTy) {
1490  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494  }
1495  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496 }
1497 
1498 llvm::FunctionCallee
1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1500  bool IsGPUDistribute) {
1501  assert((IVSize == 32 || IVSize == 64) &&
1502  "IV size is not compatible with the omp runtime");
1503  StringRef Name;
1504  if (IsGPUDistribute)
1505  Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1506  : "__kmpc_distribute_static_init_4u")
1507  : (IVSigned ? "__kmpc_distribute_static_init_8"
1508  : "__kmpc_distribute_static_init_8u");
1509  else
1510  Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1511  : "__kmpc_for_static_init_4u")
1512  : (IVSigned ? "__kmpc_for_static_init_8"
1513  : "__kmpc_for_static_init_8u");
1514 
1515  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1516  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1517  llvm::Type *TypeParams[] = {
1518  getIdentTyPointerTy(), // loc
1519  CGM.Int32Ty, // tid
1520  CGM.Int32Ty, // schedtype
1521  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1522  PtrTy, // p_lower
1523  PtrTy, // p_upper
1524  PtrTy, // p_stride
1525  ITy, // incr
1526  ITy // chunk
1527  };
1528  auto *FnTy =
1529  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1530  return CGM.CreateRuntimeFunction(FnTy, Name);
1531 }
1532 
1533 llvm::FunctionCallee
1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1535  assert((IVSize == 32 || IVSize == 64) &&
1536  "IV size is not compatible with the omp runtime");
1537  StringRef Name =
1538  IVSize == 32
1539  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1540  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1541  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1542  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1543  CGM.Int32Ty, // tid
1544  CGM.Int32Ty, // schedtype
1545  ITy, // lower
1546  ITy, // upper
1547  ITy, // stride
1548  ITy // chunk
1549  };
1550  auto *FnTy =
1551  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1552  return CGM.CreateRuntimeFunction(FnTy, Name);
1553 }
1554 
1555 llvm::FunctionCallee
1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1557  assert((IVSize == 32 || IVSize == 64) &&
1558  "IV size is not compatible with the omp runtime");
1559  StringRef Name =
1560  IVSize == 32
1561  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1562  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1563  llvm::Type *TypeParams[] = {
1564  getIdentTyPointerTy(), // loc
1565  CGM.Int32Ty, // tid
1566  };
1567  auto *FnTy =
1568  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1569  return CGM.CreateRuntimeFunction(FnTy, Name);
1570 }
1571 
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1574  assert((IVSize == 32 || IVSize == 64) &&
1575  "IV size is not compatible with the omp runtime");
1576  StringRef Name =
1577  IVSize == 32
1578  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1579  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1580  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582  llvm::Type *TypeParams[] = {
1583  getIdentTyPointerTy(), // loc
1584  CGM.Int32Ty, // tid
1585  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586  PtrTy, // p_lower
1587  PtrTy, // p_upper
1588  PtrTy // p_stride
1589  };
1590  auto *FnTy =
1591  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1592  return CGM.CreateRuntimeFunction(FnTy, Name);
1593 }
1594 
1595 /// Obtain information that uniquely identifies a target entry. This
1596 /// consists of the file and device IDs as well as line number associated with
1597 /// the relevant entry source location.
1599  unsigned &DeviceID, unsigned &FileID,
1600  unsigned &LineNum) {
1601  SourceManager &SM = C.getSourceManager();
1602 
1603  // The loc should be always valid and have a file ID (the user cannot use
1604  // #pragma directives in macros)
1605 
1606  assert(Loc.isValid() && "Source location is expected to be always valid.");
1607 
1608  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1609  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1610 
1611  llvm::sys::fs::UniqueID ID;
1612  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1613  PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1614  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1615  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1616  SM.getDiagnostics().Report(diag::err_cannot_open_file)
1617  << PLoc.getFilename() << EC.message();
1618  }
1619 
1620  DeviceID = ID.getDevice();
1621  FileID = ID.getFile();
1622  LineNum = PLoc.getLine();
1623 }
1624 
1626  if (CGM.getLangOpts().OpenMPSimd)
1627  return Address::invalid();
1629  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1630  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1631  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1632  HasRequiresUnifiedSharedMemory))) {
1633  SmallString<64> PtrName;
1634  {
1635  llvm::raw_svector_ostream OS(PtrName);
1636  OS << CGM.getMangledName(GlobalDecl(VD));
1637  if (!VD->isExternallyVisible()) {
1638  unsigned DeviceID, FileID, Line;
1640  VD->getCanonicalDecl()->getBeginLoc(),
1641  DeviceID, FileID, Line);
1642  OS << llvm::format("_%x", FileID);
1643  }
1644  OS << "_decl_tgt_ref_ptr";
1645  }
1646  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1647  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1648  llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1649  if (!Ptr) {
1650  Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1651 
1652  auto *GV = cast<llvm::GlobalVariable>(Ptr);
1653  GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1654 
1655  if (!CGM.getLangOpts().OpenMPIsDevice)
1656  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1657  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1658  }
1659  return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1660  }
1661  return Address::invalid();
1662 }
1663 
1664 llvm::Constant *
1665 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1666  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1668  // Lookup the entry, lazily creating it if necessary.
1669  std::string Suffix = getName({"cache", ""});
1670  return getOrCreateInternalVariable(
1671  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1672 }
1673 
1675  const VarDecl *VD,
1676  Address VDAddr,
1677  SourceLocation Loc) {
1678  if (CGM.getLangOpts().OpenMPUseTLS &&
1680  return VDAddr;
1681 
1682  llvm::Type *VarTy = VDAddr.getElementType();
1683  llvm::Value *Args[] = {
1684  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1685  CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1687  getOrCreateThreadPrivateCache(VD)};
1688  return Address(
1689  CGF.EmitRuntimeCall(
1690  OMPBuilder.getOrCreateRuntimeFunction(
1691  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1692  Args),
1693  CGF.Int8Ty, VDAddr.getAlignment());
1694 }
1695 
1696 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1697  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1698  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1699  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1700  // library.
1701  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1702  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1703  CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1704  OMPLoc);
1705  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1706  // to register constructor/destructor for variable.
1707  llvm::Value *Args[] = {
1708  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1709  Ctor, CopyCtor, Dtor};
1710  CGF.EmitRuntimeCall(
1711  OMPBuilder.getOrCreateRuntimeFunction(
1712  CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1713  Args);
1714 }
1715 
1717  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1718  bool PerformInit, CodeGenFunction *CGF) {
1719  if (CGM.getLangOpts().OpenMPUseTLS &&
1721  return nullptr;
1722 
1723  VD = VD->getDefinition(CGM.getContext());
1724  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1725  QualType ASTTy = VD->getType();
1726 
1727  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1728  const Expr *Init = VD->getAnyInitializer();
1729  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1730  // Generate function that re-emits the declaration's initializer into the
1731  // threadprivate copy of the variable VD
1732  CodeGenFunction CtorCGF(CGM);
1733  FunctionArgList Args;
1734  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1735  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1737  Args.push_back(&Dst);
1738 
1739  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1740  CGM.getContext().VoidPtrTy, Args);
1741  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1742  std::string Name = getName({"__kmpc_global_ctor_", ""});
1743  llvm::Function *Fn =
1744  CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1745  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1746  Args, Loc, Loc);
1747  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1748  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1749  CGM.getContext().VoidPtrTy, Dst.getLocation());
1750  Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1751  Arg = CtorCGF.Builder.CreateElementBitCast(
1752  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1753  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1754  /*IsInitializer=*/true);
1755  ArgVal = CtorCGF.EmitLoadOfScalar(
1756  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1757  CGM.getContext().VoidPtrTy, Dst.getLocation());
1758  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1759  CtorCGF.FinishFunction();
1760  Ctor = Fn;
1761  }
1762  if (VD->getType().isDestructedType() != QualType::DK_none) {
1763  // Generate function that emits destructor call for the threadprivate copy
1764  // of the variable VD
1765  CodeGenFunction DtorCGF(CGM);
1766  FunctionArgList Args;
1767  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1768  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1770  Args.push_back(&Dst);
1771 
1772  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1773  CGM.getContext().VoidTy, Args);
1774  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1775  std::string Name = getName({"__kmpc_global_dtor_", ""});
1776  llvm::Function *Fn =
1777  CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1778  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1779  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1780  Loc, Loc);
1781  // Create a scope with an artificial location for the body of this function.
1782  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1783  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1784  DtorCGF.GetAddrOfLocalVar(&Dst),
1785  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1786  DtorCGF.emitDestroy(
1787  Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1788  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1789  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1790  DtorCGF.FinishFunction();
1791  Dtor = Fn;
1792  }
1793  // Do not emit init function if it is not required.
1794  if (!Ctor && !Dtor)
1795  return nullptr;
1796 
1797  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1798  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1799  /*isVarArg=*/false)
1800  ->getPointerTo();
1801  // Copying constructor for the threadprivate variable.
1802  // Must be NULL - reserved by runtime, but currently it requires that this
1803  // parameter is always NULL. Otherwise it fires assertion.
1804  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1805  if (Ctor == nullptr) {
1806  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1807  /*isVarArg=*/false)
1808  ->getPointerTo();
1809  Ctor = llvm::Constant::getNullValue(CtorTy);
1810  }
1811  if (Dtor == nullptr) {
1812  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1813  /*isVarArg=*/false)
1814  ->getPointerTo();
1815  Dtor = llvm::Constant::getNullValue(DtorTy);
1816  }
1817  if (!CGF) {
1818  auto *InitFunctionTy =
1819  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1820  std::string Name = getName({"__omp_threadprivate_init_", ""});
1821  llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1822  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1823  CodeGenFunction InitCGF(CGM);
1824  FunctionArgList ArgList;
1825  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1826  CGM.getTypes().arrangeNullaryFunction(), ArgList,
1827  Loc, Loc);
1828  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1829  InitCGF.FinishFunction();
1830  return InitFunction;
1831  }
1832  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1833  }
1834  return nullptr;
1835 }
1836 
1838  llvm::GlobalVariable *Addr,
1839  bool PerformInit) {
1840  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1841  !CGM.getLangOpts().OpenMPIsDevice)
1842  return false;
1844  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1845  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1846  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1847  HasRequiresUnifiedSharedMemory))
1848  return CGM.getLangOpts().OpenMPIsDevice;
1849  VD = VD->getDefinition(CGM.getContext());
1850  assert(VD && "Unknown VarDecl");
1851 
1852  if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1853  return CGM.getLangOpts().OpenMPIsDevice;
1854 
1855  QualType ASTTy = VD->getType();
1857 
1858  // Produce the unique prefix to identify the new target regions. We use
1859  // the source location of the variable declaration which we know to not
1860  // conflict with any target region.
1861  unsigned DeviceID;
1862  unsigned FileID;
1863  unsigned Line;
1864  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1865  SmallString<128> Buffer, Out;
1866  {
1867  llvm::raw_svector_ostream OS(Buffer);
1868  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1869  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1870  }
1871 
1872  const Expr *Init = VD->getAnyInitializer();
1873  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1874  llvm::Constant *Ctor;
1875  llvm::Constant *ID;
1876  if (CGM.getLangOpts().OpenMPIsDevice) {
1877  // Generate function that re-emits the declaration's initializer into
1878  // the threadprivate copy of the variable VD
1879  CodeGenFunction CtorCGF(CGM);
1880 
1882  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1883  llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1884  FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1885  llvm::GlobalValue::WeakODRLinkage);
1886  if (CGM.getTriple().isAMDGCN())
1887  Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1888  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1889  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1890  FunctionArgList(), Loc, Loc);
1891  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1892  llvm::Constant *AddrInAS0 = Addr;
1893  if (Addr->getAddressSpace() != 0)
1894  AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1895  Addr, llvm::PointerType::getWithSamePointeeType(
1896  cast<llvm::PointerType>(Addr->getType()), 0));
1897  CtorCGF.EmitAnyExprToMem(Init,
1898  Address(AddrInAS0, Addr->getValueType(),
1899  CGM.getContext().getDeclAlign(VD)),
1900  Init->getType().getQualifiers(),
1901  /*IsInitializer=*/true);
1902  CtorCGF.FinishFunction();
1903  Ctor = Fn;
1904  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1905  } else {
1906  Ctor = new llvm::GlobalVariable(
1907  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1908  llvm::GlobalValue::PrivateLinkage,
1909  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1910  ID = Ctor;
1911  }
1912 
1913  // Register the information for the entry associated with the constructor.
1914  Out.clear();
1915  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1916  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1917  ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1918  }
1919  if (VD->getType().isDestructedType() != QualType::DK_none) {
1920  llvm::Constant *Dtor;
1921  llvm::Constant *ID;
1922  if (CGM.getLangOpts().OpenMPIsDevice) {
1923  // Generate function that emits destructor call for the threadprivate
1924  // copy of the variable VD
1925  CodeGenFunction DtorCGF(CGM);
1926 
1928  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1929  llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1930  FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1931  llvm::GlobalValue::WeakODRLinkage);
1932  if (CGM.getTriple().isAMDGCN())
1933  Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1934  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1935  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936  FunctionArgList(), Loc, Loc);
1937  // Create a scope with an artificial location for the body of this
1938  // function.
1939  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1940  llvm::Constant *AddrInAS0 = Addr;
1941  if (Addr->getAddressSpace() != 0)
1942  AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1943  Addr, llvm::PointerType::getWithSamePointeeType(
1944  cast<llvm::PointerType>(Addr->getType()), 0));
1945  DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1946  CGM.getContext().getDeclAlign(VD)),
1947  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1948  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1949  DtorCGF.FinishFunction();
1950  Dtor = Fn;
1951  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1952  } else {
1953  Dtor = new llvm::GlobalVariable(
1954  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1955  llvm::GlobalValue::PrivateLinkage,
1956  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1957  ID = Dtor;
1958  }
1959  // Register the information for the entry associated with the destructor.
1960  Out.clear();
1961  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1962  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1963  ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1964  }
1965  return CGM.getLangOpts().OpenMPIsDevice;
1966 }
1967 
1969  QualType VarType,
1970  StringRef Name) {
1971  std::string Suffix = getName({"artificial", ""});
1972  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1973  llvm::GlobalVariable *GAddr =
1974  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1975  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1977  GAddr->setThreadLocal(/*Val=*/true);
1978  return Address(GAddr, GAddr->getValueType(),
1979  CGM.getContext().getTypeAlignInChars(VarType));
1980  }
1981  std::string CacheSuffix = getName({"cache", ""});
1982  llvm::Value *Args[] = {
1984  getThreadID(CGF, SourceLocation()),
1986  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1987  /*isSigned=*/false),
1988  getOrCreateInternalVariable(
1989  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1990  return Address(
1992  CGF.EmitRuntimeCall(
1993  OMPBuilder.getOrCreateRuntimeFunction(
1994  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1995  Args),
1996  VarLVType->getPointerTo(/*AddrSpace=*/0)),
1997  VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1998 }
1999 
2001  const RegionCodeGenTy &ThenGen,
2002  const RegionCodeGenTy &ElseGen) {
2003  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2004 
2005  // If the condition constant folds and can be elided, try to avoid emitting
2006  // the condition and the dead arm of the if/else.
2007  bool CondConstant;
2008  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2009  if (CondConstant)
2010  ThenGen(CGF);
2011  else
2012  ElseGen(CGF);
2013  return;
2014  }
2015 
2016  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2017  // emit the conditional branch.
2018  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2019  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2020  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2021  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2022 
2023  // Emit the 'then' code.
2024  CGF.EmitBlock(ThenBlock);
2025  ThenGen(CGF);
2026  CGF.EmitBranch(ContBlock);
2027  // Emit the 'else' code if present.
2028  // There is no need to emit line number for unconditional branch.
2030  CGF.EmitBlock(ElseBlock);
2031  ElseGen(CGF);
2032  // There is no need to emit line number for unconditional branch.
2034  CGF.EmitBranch(ContBlock);
2035  // Emit the continuation block for code after the if.
2036  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2037 }
2038 
2040  llvm::Function *OutlinedFn,
2041  ArrayRef<llvm::Value *> CapturedVars,
2042  const Expr *IfCond,
2043  llvm::Value *NumThreads) {
2044  if (!CGF.HaveInsertPoint())
2045  return;
2046  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2047  auto &M = CGM.getModule();
2048  auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2049  this](CodeGenFunction &CGF, PrePostActionTy &) {
2050  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2051  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2052  llvm::Value *Args[] = {
2053  RTLoc,
2054  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2055  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2057  RealArgs.append(std::begin(Args), std::end(Args));
2058  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2059 
2060  llvm::FunctionCallee RTLFn =
2061  OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2062  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2063  };
2064  auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2065  this](CodeGenFunction &CGF, PrePostActionTy &) {
2066  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2068  // Build calls:
2069  // __kmpc_serialized_parallel(&Loc, GTid);
2070  llvm::Value *Args[] = {RTLoc, ThreadID};
2071  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2072  M, OMPRTL___kmpc_serialized_parallel),
2073  Args);
2074 
2075  // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2076  Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2077  Address ZeroAddrBound =
2078  CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2079  /*Name=*/".bound.zero.addr");
2080  CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2081  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2082  // ThreadId for serialized parallels is 0.
2083  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2084  OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2085  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2086 
2087  // Ensure we do not inline the function. This is trivially true for the ones
2088  // passed to __kmpc_fork_call but the ones called in serialized regions
2089  // could be inlined. This is not a perfect but it is closer to the invariant
2090  // we want, namely, every data environment starts with a new function.
2091  // TODO: We should pass the if condition to the runtime function and do the
2092  // handling there. Much cleaner code.
2093  OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2094  OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2095  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2096 
2097  // __kmpc_end_serialized_parallel(&Loc, GTid);
2098  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2099  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100  M, OMPRTL___kmpc_end_serialized_parallel),
2101  EndArgs);
2102  };
2103  if (IfCond) {
2104  emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2105  } else {
2106  RegionCodeGenTy ThenRCG(ThenGen);
2107  ThenRCG(CGF);
2108  }
2109 }
2110 
2111 // If we're inside an (outlined) parallel region, use the region info's
2112 // thread-ID variable (it is passed in a first argument of the outlined function
2113 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2114 // regular serial code region, get thread ID by calling kmp_int32
2115 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2116 // return the address of that temp.
2118  SourceLocation Loc) {
2119  if (auto *OMPRegionInfo =
2120  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2121  if (OMPRegionInfo->getThreadIDVariable())
2122  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2123 
2124  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2125  QualType Int32Ty =
2126  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2127  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2128  CGF.EmitStoreOfScalar(ThreadID,
2129  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2130 
2131  return ThreadIDTemp;
2132 }
2133 
2134 llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2135  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2136  SmallString<256> Buffer;
2137  llvm::raw_svector_ostream Out(Buffer);
2138  Out << Name;
2139  StringRef RuntimeName = Out.str();
2140  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2141  if (Elem.second) {
2142  assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&
2143  "OMP internal variable has different type than requested");
2144  return &*Elem.second;
2145  }
2146 
2147  return Elem.second = new llvm::GlobalVariable(
2148  CGM.getModule(), Ty, /*IsConstant*/ false,
2149  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2150  Elem.first(), /*InsertBefore=*/nullptr,
2151  llvm::GlobalValue::NotThreadLocal, AddressSpace);
2152 }
2153 
2154 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2155  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2156  std::string Name = getName({Prefix, "var"});
2157  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2158 }
2159 
2160 namespace {
2161 /// Common pre(post)-action for different OpenMP constructs.
2162 class CommonActionTy final : public PrePostActionTy {
2163  llvm::FunctionCallee EnterCallee;
2164  ArrayRef<llvm::Value *> EnterArgs;
2165  llvm::FunctionCallee ExitCallee;
2166  ArrayRef<llvm::Value *> ExitArgs;
2167  bool Conditional;
2168  llvm::BasicBlock *ContBlock = nullptr;
2169 
2170 public:
2171  CommonActionTy(llvm::FunctionCallee EnterCallee,
2172  ArrayRef<llvm::Value *> EnterArgs,
2173  llvm::FunctionCallee ExitCallee,
2174  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2175  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2176  ExitArgs(ExitArgs), Conditional(Conditional) {}
2177  void Enter(CodeGenFunction &CGF) override {
2178  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2179  if (Conditional) {
2180  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2181  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2182  ContBlock = CGF.createBasicBlock("omp_if.end");
2183  // Generate the branch (If-stmt)
2184  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2185  CGF.EmitBlock(ThenBlock);
2186  }
2187  }
2188  void Done(CodeGenFunction &CGF) {
2189  // Emit the rest of blocks/branches
2190  CGF.EmitBranch(ContBlock);
2191  CGF.EmitBlock(ContBlock, true);
2192  }
2193  void Exit(CodeGenFunction &CGF) override {
2194  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2195  }
2196 };
2197 } // anonymous namespace
2198 
2200  StringRef CriticalName,
2201  const RegionCodeGenTy &CriticalOpGen,
2202  SourceLocation Loc, const Expr *Hint) {
2203  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2204  // CriticalOpGen();
2205  // __kmpc_end_critical(ident_t *, gtid, Lock);
2206  // Prepare arguments and build a call to __kmpc_critical
2207  if (!CGF.HaveInsertPoint())
2208  return;
2209  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2210  getCriticalRegionLock(CriticalName)};
2211  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2212  std::end(Args));
2213  if (Hint) {
2214  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2215  CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2216  }
2217  CommonActionTy Action(
2218  OMPBuilder.getOrCreateRuntimeFunction(
2219  CGM.getModule(),
2220  Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2221  EnterArgs,
2222  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2223  OMPRTL___kmpc_end_critical),
2224  Args);
2225  CriticalOpGen.setAction(Action);
2226  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2227 }
2228 
2230  const RegionCodeGenTy &MasterOpGen,
2231  SourceLocation Loc) {
2232  if (!CGF.HaveInsertPoint())
2233  return;
2234  // if(__kmpc_master(ident_t *, gtid)) {
2235  // MasterOpGen();
2236  // __kmpc_end_master(ident_t *, gtid);
2237  // }
2238  // Prepare arguments and build a call to __kmpc_master
2239  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2240  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2241  CGM.getModule(), OMPRTL___kmpc_master),
2242  Args,
2243  OMPBuilder.getOrCreateRuntimeFunction(
2244  CGM.getModule(), OMPRTL___kmpc_end_master),
2245  Args,
2246  /*Conditional=*/true);
2247  MasterOpGen.setAction(Action);
2248  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2249  Action.Done(CGF);
2250 }
2251 
2253  const RegionCodeGenTy &MaskedOpGen,
2254  SourceLocation Loc, const Expr *Filter) {
2255  if (!CGF.HaveInsertPoint())
2256  return;
2257  // if(__kmpc_masked(ident_t *, gtid, filter)) {
2258  // MaskedOpGen();
2259  // __kmpc_end_masked(iden_t *, gtid);
2260  // }
2261  // Prepare arguments and build a call to __kmpc_masked
2262  llvm::Value *FilterVal = Filter
2263  ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2264  : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2265  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2266  FilterVal};
2267  llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2268  getThreadID(CGF, Loc)};
2269  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2270  CGM.getModule(), OMPRTL___kmpc_masked),
2271  Args,
2272  OMPBuilder.getOrCreateRuntimeFunction(
2273  CGM.getModule(), OMPRTL___kmpc_end_masked),
2274  ArgsEnd,
2275  /*Conditional=*/true);
2276  MaskedOpGen.setAction(Action);
2277  emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2278  Action.Done(CGF);
2279 }
2280 
2282  SourceLocation Loc) {
2283  if (!CGF.HaveInsertPoint())
2284  return;
2285  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2286  OMPBuilder.createTaskyield(CGF.Builder);
2287  } else {
2288  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2289  llvm::Value *Args[] = {
2290  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2291  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2292  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2293  CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2294  Args);
2295  }
2296 
2297  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2298  Region->emitUntiedSwitch(CGF);
2299 }
2300 
2302  const RegionCodeGenTy &TaskgroupOpGen,
2303  SourceLocation Loc) {
2304  if (!CGF.HaveInsertPoint())
2305  return;
2306  // __kmpc_taskgroup(ident_t *, gtid);
2307  // TaskgroupOpGen();
2308  // __kmpc_end_taskgroup(ident_t *, gtid);
2309  // Prepare arguments and build a call to __kmpc_taskgroup
2310  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2312  CGM.getModule(), OMPRTL___kmpc_taskgroup),
2313  Args,
2314  OMPBuilder.getOrCreateRuntimeFunction(
2315  CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2316  Args);
2317  TaskgroupOpGen.setAction(Action);
2318  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2319 }
2320 
2321 /// Given an array of pointers to variables, project the address of a
2322 /// given variable.
2324  unsigned Index, const VarDecl *Var) {
2325  // Pull out the pointer to the variable.
2326  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2327  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2328 
2329  llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2330  return Address(
2331  CGF.Builder.CreateBitCast(
2332  Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2333  ElemTy, CGF.getContext().getDeclAlign(Var));
2334 }
2335 
2336 static llvm::Value *emitCopyprivateCopyFunction(
2337  CodeGenModule &CGM, llvm::Type *ArgsElemType,
2338  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2339  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2340  SourceLocation Loc) {
2341  ASTContext &C = CGM.getContext();
2342  // void copy_func(void *LHSArg, void *RHSArg);
2343  FunctionArgList Args;
2344  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2346  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2348  Args.push_back(&LHSArg);
2349  Args.push_back(&RHSArg);
2350  const auto &CGFI =
2351  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2352  std::string Name =
2353  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2354  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2356  &CGM.getModule());
2357  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2358  Fn->setDoesNotRecurse();
2359  CodeGenFunction CGF(CGM);
2360  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2361  // Dest = (void*[n])(LHSArg);
2362  // Src = (void*[n])(RHSArg);
2364  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2365  ArgsElemType->getPointerTo()),
2366  ArgsElemType, CGF.getPointerAlign());
2368  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2369  ArgsElemType->getPointerTo()),
2370  ArgsElemType, CGF.getPointerAlign());
2371  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2372  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2373  // ...
2374  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2375  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2376  const auto *DestVar =
2377  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2378  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2379 
2380  const auto *SrcVar =
2381  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2382  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2383 
2384  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2385  QualType Type = VD->getType();
2386  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2387  }
2388  CGF.FinishFunction();
2389  return Fn;
2390 }
2391 
2393  const RegionCodeGenTy &SingleOpGen,
2394  SourceLocation Loc,
2395  ArrayRef<const Expr *> CopyprivateVars,
2396  ArrayRef<const Expr *> SrcExprs,
2397  ArrayRef<const Expr *> DstExprs,
2398  ArrayRef<const Expr *> AssignmentOps) {
2399  if (!CGF.HaveInsertPoint())
2400  return;
2401  assert(CopyprivateVars.size() == SrcExprs.size() &&
2402  CopyprivateVars.size() == DstExprs.size() &&
2403  CopyprivateVars.size() == AssignmentOps.size());
2404  ASTContext &C = CGM.getContext();
2405  // int32 did_it = 0;
2406  // if(__kmpc_single(ident_t *, gtid)) {
2407  // SingleOpGen();
2408  // __kmpc_end_single(ident_t *, gtid);
2409  // did_it = 1;
2410  // }
2411  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2412  // <copy_func>, did_it);
2413 
2414  Address DidIt = Address::invalid();
2415  if (!CopyprivateVars.empty()) {
2416  // int32 did_it = 0;
2417  QualType KmpInt32Ty =
2418  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2419  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2420  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2421  }
2422  // Prepare arguments and build a call to __kmpc_single
2423  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2424  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2425  CGM.getModule(), OMPRTL___kmpc_single),
2426  Args,
2427  OMPBuilder.getOrCreateRuntimeFunction(
2428  CGM.getModule(), OMPRTL___kmpc_end_single),
2429  Args,
2430  /*Conditional=*/true);
2431  SingleOpGen.setAction(Action);
2432  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2433  if (DidIt.isValid()) {
2434  // did_it = 1;
2435  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2436  }
2437  Action.Done(CGF);
2438  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439  // <copy_func>, did_it);
2440  if (DidIt.isValid()) {
2441  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2442  QualType CopyprivateArrayTy = C.getConstantArrayType(
2443  C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2444  /*IndexTypeQuals=*/0);
2445  // Create a list of all private variables for copyprivate.
2446  Address CopyprivateList =
2447  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2448  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2449  Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2450  CGF.Builder.CreateStore(
2452  CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2453  CGF.VoidPtrTy),
2454  Elem);
2455  }
2456  // Build function that copies private values from single region to all other
2457  // threads in the corresponding parallel region.
2458  llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2459  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2460  SrcExprs, DstExprs, AssignmentOps, Loc);
2461  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2463  CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2464  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2465  llvm::Value *Args[] = {
2466  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2467  getThreadID(CGF, Loc), // i32 <gtid>
2468  BufSize, // size_t <buf_size>
2469  CL.getPointer(), // void *<copyprivate list>
2470  CpyFn, // void (*) (void *, void *) <copy_func>
2471  DidItVal // i32 did_it
2472  };
2473  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2474  CGM.getModule(), OMPRTL___kmpc_copyprivate),
2475  Args);
2476  }
2477 }
2478 
2480  const RegionCodeGenTy &OrderedOpGen,
2481  SourceLocation Loc, bool IsThreads) {
2482  if (!CGF.HaveInsertPoint())
2483  return;
2484  // __kmpc_ordered(ident_t *, gtid);
2485  // OrderedOpGen();
2486  // __kmpc_end_ordered(ident_t *, gtid);
2487  // Prepare arguments and build a call to __kmpc_ordered
2488  if (IsThreads) {
2489  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2490  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2491  CGM.getModule(), OMPRTL___kmpc_ordered),
2492  Args,
2493  OMPBuilder.getOrCreateRuntimeFunction(
2494  CGM.getModule(), OMPRTL___kmpc_end_ordered),
2495  Args);
2496  OrderedOpGen.setAction(Action);
2497  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2498  return;
2499  }
2500  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2501 }
2502 
2504  unsigned Flags;
2505  if (Kind == OMPD_for)
2506  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2507  else if (Kind == OMPD_sections)
2508  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2509  else if (Kind == OMPD_single)
2510  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2511  else if (Kind == OMPD_barrier)
2512  Flags = OMP_IDENT_BARRIER_EXPL;
2513  else
2514  Flags = OMP_IDENT_BARRIER_IMPL;
2515  return Flags;
2516 }
2517 
2519  CodeGenFunction &CGF, const OMPLoopDirective &S,
2520  OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2521  // Check if the loop directive is actually a doacross loop directive. In this
2522  // case choose static, 1 schedule.
2523  if (llvm::any_of(
2524  S.getClausesOfKind<OMPOrderedClause>(),
2525  [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2526  ScheduleKind = OMPC_SCHEDULE_static;
2527  // Chunk size is 1 in this case.
2528  llvm::APInt ChunkSize(32, 1);
2529  ChunkExpr = IntegerLiteral::Create(
2530  CGF.getContext(), ChunkSize,
2531  CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2532  SourceLocation());
2533  }
2534 }
2535 
2537  OpenMPDirectiveKind Kind, bool EmitChecks,
2538  bool ForceSimpleCall) {
2539  // Check if we should use the OMPBuilder
2540  auto *OMPRegionInfo =
2541  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2542  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2543  CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2544  CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2545  return;
2546  }
2547 
2548  if (!CGF.HaveInsertPoint())
2549  return;
2550  // Build call __kmpc_cancel_barrier(loc, thread_id);
2551  // Build call __kmpc_barrier(loc, thread_id);
2552  unsigned Flags = getDefaultFlagsForBarriers(Kind);
2553  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2554  // thread_id);
2555  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2556  getThreadID(CGF, Loc)};
2557  if (OMPRegionInfo) {
2558  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2559  llvm::Value *Result = CGF.EmitRuntimeCall(
2560  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2561  OMPRTL___kmpc_cancel_barrier),
2562  Args);
2563  if (EmitChecks) {
2564  // if (__kmpc_cancel_barrier()) {
2565  // exit from construct;
2566  // }
2567  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2568  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2569  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2570  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2571  CGF.EmitBlock(ExitBB);
2572  // exit from construct;
2573  CodeGenFunction::JumpDest CancelDestination =
2574  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2575  CGF.EmitBranchThroughCleanup(CancelDestination);
2576  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2577  }
2578  return;
2579  }
2580  }
2581  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2582  CGM.getModule(), OMPRTL___kmpc_barrier),
2583  Args);
2584 }
2585 
2586 /// Map the OpenMP loop schedule to the runtime enumeration.
2587 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2588  bool Chunked, bool Ordered) {
2589  switch (ScheduleKind) {
2590  case OMPC_SCHEDULE_static:
2591  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2592  : (Ordered ? OMP_ord_static : OMP_sch_static);
2593  case OMPC_SCHEDULE_dynamic:
2594  return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2595  case OMPC_SCHEDULE_guided:
2596  return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2597  case OMPC_SCHEDULE_runtime:
2598  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2599  case OMPC_SCHEDULE_auto:
2600  return Ordered ? OMP_ord_auto : OMP_sch_auto;
2601  case OMPC_SCHEDULE_unknown:
2602  assert(!Chunked && "chunk was specified but schedule kind not known");
2603  return Ordered ? OMP_ord_static : OMP_sch_static;
2604  }
2605  llvm_unreachable("Unexpected runtime schedule");
2606 }
2607 
2608 /// Map the OpenMP distribute schedule to the runtime enumeration.
2609 static OpenMPSchedType
2611  // only static is allowed for dist_schedule
2612  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2613 }
2614 
2616  bool Chunked) const {
2617  OpenMPSchedType Schedule =
2618  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2619  return Schedule == OMP_sch_static;
2620 }
2621 
2623  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2624  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2625  return Schedule == OMP_dist_sch_static;
2626 }
2627 
2629  bool Chunked) const {
2630  OpenMPSchedType Schedule =
2631  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2632  return Schedule == OMP_sch_static_chunked;
2633 }
2634 
2636  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2637  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2638  return Schedule == OMP_dist_sch_static_chunked;
2639 }
2640 
2642  OpenMPSchedType Schedule =
2643  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2644  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2645  return Schedule != OMP_sch_static;
2646 }
2647 
2648 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2651  int Modifier = 0;
2652  switch (M1) {
2653  case OMPC_SCHEDULE_MODIFIER_monotonic:
2654  Modifier = OMP_sch_modifier_monotonic;
2655  break;
2656  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2657  Modifier = OMP_sch_modifier_nonmonotonic;
2658  break;
2659  case OMPC_SCHEDULE_MODIFIER_simd:
2660  if (Schedule == OMP_sch_static_chunked)
2661  Schedule = OMP_sch_static_balanced_chunked;
2662  break;
2665  break;
2666  }
2667  switch (M2) {
2668  case OMPC_SCHEDULE_MODIFIER_monotonic:
2669  Modifier = OMP_sch_modifier_monotonic;
2670  break;
2671  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2672  Modifier = OMP_sch_modifier_nonmonotonic;
2673  break;
2674  case OMPC_SCHEDULE_MODIFIER_simd:
2675  if (Schedule == OMP_sch_static_chunked)
2676  Schedule = OMP_sch_static_balanced_chunked;
2677  break;
2680  break;
2681  }
2682  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2683  // If the static schedule kind is specified or if the ordered clause is
2684  // specified, and if the nonmonotonic modifier is not specified, the effect is
2685  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2686  // modifier is specified, the effect is as if the nonmonotonic modifier is
2687  // specified.
2688  if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2689  if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2690  Schedule == OMP_sch_static_balanced_chunked ||
2691  Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2692  Schedule == OMP_dist_sch_static_chunked ||
2693  Schedule == OMP_dist_sch_static))
2694  Modifier = OMP_sch_modifier_nonmonotonic;
2695  }
2696  return Schedule | Modifier;
2697 }
2698 
2700  CodeGenFunction &CGF, SourceLocation Loc,
2701  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2702  bool Ordered, const DispatchRTInput &DispatchValues) {
2703  if (!CGF.HaveInsertPoint())
2704  return;
2705  OpenMPSchedType Schedule = getRuntimeSchedule(
2706  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2707  assert(Ordered ||
2708  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2709  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2710  Schedule != OMP_sch_static_balanced_chunked));
2711  // Call __kmpc_dispatch_init(
2712  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2713  // kmp_int[32|64] lower, kmp_int[32|64] upper,
2714  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2715 
2716  // If the Chunk was not specified in the clause - use default value 1.
2717  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2718  : CGF.Builder.getIntN(IVSize, 1);
2719  llvm::Value *Args[] = {
2720  emitUpdateLocation(CGF, Loc),
2721  getThreadID(CGF, Loc),
2722  CGF.Builder.getInt32(addMonoNonMonoModifier(
2723  CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2724  DispatchValues.LB, // Lower
2725  DispatchValues.UB, // Upper
2726  CGF.Builder.getIntN(IVSize, 1), // Stride
2727  Chunk // Chunk
2728  };
2729  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2730 }
2731 
2733  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2734  llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2736  const CGOpenMPRuntime::StaticRTInput &Values) {
2737  if (!CGF.HaveInsertPoint())
2738  return;
2739 
2740  assert(!Values.Ordered);
2741  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2742  Schedule == OMP_sch_static_balanced_chunked ||
2743  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2744  Schedule == OMP_dist_sch_static ||
2745  Schedule == OMP_dist_sch_static_chunked);
2746 
2747  // Call __kmpc_for_static_init(
2748  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2749  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2750  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2751  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2752  llvm::Value *Chunk = Values.Chunk;
2753  if (Chunk == nullptr) {
2754  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2755  Schedule == OMP_dist_sch_static) &&
2756  "expected static non-chunked schedule");
2757  // If the Chunk was not specified in the clause - use default value 1.
2758  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2759  } else {
2760  assert((Schedule == OMP_sch_static_chunked ||
2761  Schedule == OMP_sch_static_balanced_chunked ||
2762  Schedule == OMP_ord_static_chunked ||
2763  Schedule == OMP_dist_sch_static_chunked) &&
2764  "expected static chunked schedule");
2765  }
2766  llvm::Value *Args[] = {
2767  UpdateLocation,
2768  ThreadId,
2769  CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2770  M2)), // Schedule type
2771  Values.IL.getPointer(), // &isLastIter
2772  Values.LB.getPointer(), // &LB
2773  Values.UB.getPointer(), // &UB
2774  Values.ST.getPointer(), // &Stride
2775  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2776  Chunk // Chunk
2777  };
2778  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2779 }
2780 
2782  SourceLocation Loc,
2783  OpenMPDirectiveKind DKind,
2784  const OpenMPScheduleTy &ScheduleKind,
2785  const StaticRTInput &Values) {
2786  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2787  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2788  assert(isOpenMPWorksharingDirective(DKind) &&
2789  "Expected loop-based or sections-based directive.");
2790  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2791  isOpenMPLoopDirective(DKind)
2792  ? OMP_IDENT_WORK_LOOP
2793  : OMP_IDENT_WORK_SECTIONS);
2794  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2795  llvm::FunctionCallee StaticInitFunction =
2796  createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2797  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2798  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2799  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2800 }
2801 
2803  CodeGenFunction &CGF, SourceLocation Loc,
2804  OpenMPDistScheduleClauseKind SchedKind,
2805  const CGOpenMPRuntime::StaticRTInput &Values) {
2806  OpenMPSchedType ScheduleNum =
2807  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2808  llvm::Value *UpdatedLocation =
2809  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2810  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2811  llvm::FunctionCallee StaticInitFunction;
2812  bool isGPUDistribute =
2813  CGM.getLangOpts().OpenMPIsDevice &&
2814  (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2815  StaticInitFunction = createForStaticInitFunction(
2816  Values.IVSize, Values.IVSigned, isGPUDistribute);
2817 
2818  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2819  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2821 }
2822 
2824  SourceLocation Loc,
2825  OpenMPDirectiveKind DKind) {
2826  if (!CGF.HaveInsertPoint())
2827  return;
2828  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2829  llvm::Value *Args[] = {
2830  emitUpdateLocation(CGF, Loc,
2832  ? OMP_IDENT_WORK_DISTRIBUTE
2833  : isOpenMPLoopDirective(DKind)
2834  ? OMP_IDENT_WORK_LOOP
2835  : OMP_IDENT_WORK_SECTIONS),
2836  getThreadID(CGF, Loc)};
2837  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838  if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2839  (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2840  CGF.EmitRuntimeCall(
2841  OMPBuilder.getOrCreateRuntimeFunction(
2842  CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2843  Args);
2844  else
2845  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2846  CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2847  Args);
2848 }
2849 
2851  SourceLocation Loc,
2852  unsigned IVSize,
2853  bool IVSigned) {
2854  if (!CGF.HaveInsertPoint())
2855  return;
2856  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2857  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2858  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2859 }
2860 
2862  SourceLocation Loc, unsigned IVSize,
2863  bool IVSigned, Address IL,
2864  Address LB, Address UB,
2865  Address ST) {
2866  // Call __kmpc_dispatch_next(
2867  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2868  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2869  // kmp_int[32|64] *p_stride);
2870  llvm::Value *Args[] = {
2871  emitUpdateLocation(CGF, Loc),
2872  getThreadID(CGF, Loc),
2873  IL.getPointer(), // &isLastIter
2874  LB.getPointer(), // &Lower
2875  UB.getPointer(), // &Upper
2876  ST.getPointer() // &Stride
2877  };
2878  llvm::Value *Call =
2879  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2880  return CGF.EmitScalarConversion(
2881  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2882  CGF.getContext().BoolTy, Loc);
2883 }
2884 
2886  llvm::Value *NumThreads,
2887  SourceLocation Loc) {
2888  if (!CGF.HaveInsertPoint())
2889  return;
2890  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2891  llvm::Value *Args[] = {
2892  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2893  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2894  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2895  CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2896  Args);
2897 }
2898 
2900  ProcBindKind ProcBind,
2901  SourceLocation Loc) {
2902  if (!CGF.HaveInsertPoint())
2903  return;
2904  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2905  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2906  llvm::Value *Args[] = {
2907  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2908  llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2909  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910  CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2911  Args);
2912 }
2913 
2915  SourceLocation Loc, llvm::AtomicOrdering AO) {
2916  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2917  OMPBuilder.createFlush(CGF.Builder);
2918  } else {
2919  if (!CGF.HaveInsertPoint())
2920  return;
2921  // Build call void __kmpc_flush(ident_t *loc)
2922  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923  CGM.getModule(), OMPRTL___kmpc_flush),
2924  emitUpdateLocation(CGF, Loc));
2925  }
2926 }
2927 
2928 namespace {
2929 /// Indexes of fields for type kmp_task_t.
2930 enum KmpTaskTFields {
2931  /// List of shared variables.
2932  KmpTaskTShareds,
2933  /// Task routine.
2934  KmpTaskTRoutine,
2935  /// Partition id for the untied tasks.
2936  KmpTaskTPartId,
2937  /// Function with call of destructors for private variables.
2938  Data1,
2939  /// Task priority.
2940  Data2,
2941  /// (Taskloops only) Lower bound.
2942  KmpTaskTLowerBound,
2943  /// (Taskloops only) Upper bound.
2944  KmpTaskTUpperBound,
2945  /// (Taskloops only) Stride.
2946  KmpTaskTStride,
2947  /// (Taskloops only) Is last iteration flag.
2948  KmpTaskTLastIter,
2949  /// (Taskloops only) Reduction data.
2950  KmpTaskTReductions,
2951 };
2952 } // anonymous namespace
2953 
2954 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2955  return OffloadEntriesTargetRegion.empty() &&
2956  OffloadEntriesDeviceGlobalVar.empty();
2957 }
2958 
2959 /// Initialize target region entry.
2960 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2961  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2962  StringRef ParentName, unsigned LineNum,
2963  unsigned Order) {
2964  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
2965  "only required for the device "
2966  "code generation.");
2967  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2968  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2969  OMPTargetRegionEntryTargetRegion);
2970  ++OffloadingEntriesNum;
2971 }
2972 
2973 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2974  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2975  StringRef ParentName, unsigned LineNum,
2976  llvm::Constant *Addr, llvm::Constant *ID,
2977  OMPTargetRegionEntryKind Flags) {
2978  // If we are emitting code for a target, the entry is already initialized,
2979  // only has to be registered.
2980  if (CGM.getLangOpts().OpenMPIsDevice) {
2981  // This could happen if the device compilation is invoked standalone.
2982  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2983  return;
2984  auto &Entry =
2985  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2986  Entry.setAddress(Addr);
2987  Entry.setID(ID);
2988  Entry.setFlags(Flags);
2989  } else {
2990  if (Flags ==
2991  OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2992  hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2993  /*IgnoreAddressId*/ true))
2994  return;
2995  assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
2996  "Target region entry already registered!");
2997  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2998  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2999  ++OffloadingEntriesNum;
3000  }
3001 }
3002 
3003 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3004  unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3005  bool IgnoreAddressId) const {
3006  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3007  if (PerDevice == OffloadEntriesTargetRegion.end())
3008  return false;
3009  auto PerFile = PerDevice->second.find(FileID);
3010  if (PerFile == PerDevice->second.end())
3011  return false;
3012  auto PerParentName = PerFile->second.find(ParentName);
3013  if (PerParentName == PerFile->second.end())
3014  return false;
3015  auto PerLine = PerParentName->second.find(LineNum);
3016  if (PerLine == PerParentName->second.end())
3017  return false;
3018  // Fail if this entry is already registered.
3019  if (!IgnoreAddressId &&
3020  (PerLine->second.getAddress() || PerLine->second.getID()))
3021  return false;
3022  return true;
3023 }
3024 
3025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3026  const OffloadTargetRegionEntryInfoActTy &Action) {
3027  // Scan all target region entries and perform the provided action.
3028  for (const auto &D : OffloadEntriesTargetRegion)
3029  for (const auto &F : D.second)
3030  for (const auto &P : F.second)
3031  for (const auto &L : P.second)
3032  Action(D.first, F.first, P.first(), L.first, L.second);
3033 }
3034 
3035 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3036  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3037  OMPTargetGlobalVarEntryKind Flags,
3038  unsigned Order) {
3039  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3040  "only required for the device "
3041  "code generation.");
3042  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3043  ++OffloadingEntriesNum;
3044 }
3045 
3046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3047  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3048  CharUnits VarSize,
3049  OMPTargetGlobalVarEntryKind Flags,
3050  llvm::GlobalValue::LinkageTypes Linkage) {
3051  if (CGM.getLangOpts().OpenMPIsDevice) {
3052  // This could happen if the device compilation is invoked standalone.
3053  if (!hasDeviceGlobalVarEntryInfo(VarName))
3054  return;
3055  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3056  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3057  if (Entry.getVarSize().isZero()) {
3058  Entry.setVarSize(VarSize);
3059  Entry.setLinkage(Linkage);
3060  }
3061  return;
3062  }
3063  Entry.setVarSize(VarSize);
3064  Entry.setLinkage(Linkage);
3065  Entry.setAddress(Addr);
3066  } else {
3067  if (hasDeviceGlobalVarEntryInfo(VarName)) {
3068  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3069  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3070  "Entry not initialized!");
3071  if (Entry.getVarSize().isZero()) {
3072  Entry.setVarSize(VarSize);
3073  Entry.setLinkage(Linkage);
3074  }
3075  return;
3076  }
3077  OffloadEntriesDeviceGlobalVar.try_emplace(
3078  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3079  ++OffloadingEntriesNum;
3080  }
3081 }
3082 
3083 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3084  actOnDeviceGlobalVarEntriesInfo(
3085  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3086  // Scan all target region entries and perform the provided action.
3087  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3088  Action(E.getKey(), E.getValue());
3089 }
3090 
3092  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3093  llvm::GlobalValue::LinkageTypes Linkage) {
3094  OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
3095 }
3096 
3098  // Emit the offloading entries and metadata so that the device codegen side
3099  // can easily figure out what to emit. The produced metadata looks like
3100  // this:
3101  //
3102  // !omp_offload.info = !{!1, ...}
3103  //
3104  // Right now we only generate metadata for function that contain target
3105  // regions.
3106 
3107  // If we are in simd mode or there are no entries, we don't need to do
3108  // anything.
3109  if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3110  return;
3111 
3112  llvm::Module &M = CGM.getModule();
3113  llvm::LLVMContext &C = M.getContext();
3115  SourceLocation, StringRef>,
3116  16>
3117  OrderedEntries(OffloadEntriesInfoManager.size());
3118  llvm::SmallVector<StringRef, 16> ParentFunctions(
3119  OffloadEntriesInfoManager.size());
3120 
3121  // Auxiliary methods to create metadata values and strings.
3122  auto &&GetMDInt = [this](unsigned V) {
3123  return llvm::ConstantAsMetadata::get(
3124  llvm::ConstantInt::get(CGM.Int32Ty, V));
3125  };
3126 
3127  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3128 
3129  // Create the offloading info metadata node.
3130  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3131 
3132  // Create function that emits metadata for each target region entry;
3133  auto &&TargetRegionMetadataEmitter =
3134  [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3135  &GetMDString](
3136  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3137  unsigned Line,
3139  // Generate metadata for target regions. Each entry of this metadata
3140  // contains:
3141  // - Entry 0 -> Kind of this type of metadata (0).
3142  // - Entry 1 -> Device ID of the file where the entry was identified.
3143  // - Entry 2 -> File ID of the file where the entry was identified.
3144  // - Entry 3 -> Mangled name of the function where the entry was
3145  // identified.
3146  // - Entry 4 -> Line in the file where the entry was identified.
3147  // - Entry 5 -> Order the entry was created.
3148  // The first element of the metadata node is the kind.
3149  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3150  GetMDInt(FileID), GetMDString(ParentName),
3151  GetMDInt(Line), GetMDInt(E.getOrder())};
3152 
3153  SourceLocation Loc;
3154  for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3156  I != E; ++I) {
3157  if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3158  I->getFirst()->getUniqueID().getFile() == FileID) {
3160  I->getFirst(), Line, 1);
3161  break;
3162  }
3163  }
3164  // Save this entry in the right position of the ordered entries array.
3165  OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3166  ParentFunctions[E.getOrder()] = ParentName;
3167 
3168  // Add metadata to the named metadata node.
3169  MD->addOperand(llvm::MDNode::get(C, Ops));
3170  };
3171 
3172  OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3173  TargetRegionMetadataEmitter);
3174 
3175  // Create function that emits metadata for each device global variable entry;
3176  auto &&DeviceGlobalVarMetadataEmitter =
3177  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3178  MD](StringRef MangledName,
3180  &E) {
3181  // Generate metadata for global variables. Each entry of this metadata
3182  // contains:
3183  // - Entry 0 -> Kind of this type of metadata (1).
3184  // - Entry 1 -> Mangled name of the variable.
3185  // - Entry 2 -> Declare target kind.
3186  // - Entry 3 -> Order the entry was created.
3187  // The first element of the metadata node is the kind.
3188  llvm::Metadata *Ops[] = {
3189  GetMDInt(E.getKind()), GetMDString(MangledName),
3190  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3191 
3192  // Save this entry in the right position of the ordered entries array.
3193  OrderedEntries[E.getOrder()] =
3194  std::make_tuple(&E, SourceLocation(), MangledName);
3195 
3196  // Add metadata to the named metadata node.
3197  MD->addOperand(llvm::MDNode::get(C, Ops));
3198  };
3199 
3200  OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3201  DeviceGlobalVarMetadataEmitter);
3202 
3203  for (const auto &E : OrderedEntries) {
3204  assert(std::get<0>(E) && "All ordered entries must exist!");
3205  if (const auto *CE =
3206  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3207  std::get<0>(E))) {
3208  if (!CE->getID() || !CE->getAddress()) {
3209  // Do not blame the entry if the parent funtion is not emitted.
3210  StringRef FnName = ParentFunctions[CE->getOrder()];
3211  if (!CGM.GetGlobalValue(FnName))
3212  continue;
3213  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3215  "Offloading entry for target region in %0 is incorrect: either the "
3216  "address or the ID is invalid.");
3217  CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3218  continue;
3219  }
3220  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3221  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3222  } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3223  OffloadEntryInfoDeviceGlobalVar>(
3224  std::get<0>(E))) {
3225  OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3226  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3227  CE->getFlags());
3228  switch (Flags) {
3229  case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3230  if (CGM.getLangOpts().OpenMPIsDevice &&
3232  continue;
3233  if (!CE->getAddress()) {
3234  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3235  DiagnosticsEngine::Error, "Offloading entry for declare target "
3236  "variable %0 is incorrect: the "
3237  "address is invalid.");
3238  CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3239  continue;
3240  }
3241  // The vaiable has no definition - no need to add the entry.
3242  if (CE->getVarSize().isZero())
3243  continue;
3244  break;
3245  }
3246  case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3247  assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3248  (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3249  "Declaret target link address is set.");
3250  if (CGM.getLangOpts().OpenMPIsDevice)
3251  continue;
3252  if (!CE->getAddress()) {
3253  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3255  "Offloading entry for declare target variable is incorrect: the "
3256  "address is invalid.");
3257  CGM.getDiags().Report(DiagID);
3258  continue;
3259  }
3260  break;
3261  }
3262 
3263  // Hidden or internal symbols on the device are not externally visible. We
3264  // should not attempt to register them by creating an offloading entry.
3265  if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3266  if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3267  continue;
3268 
3269  createOffloadEntry(CE->getAddress(), CE->getAddress(),
3270  CE->getVarSize().getQuantity(), Flags,
3271  CE->getLinkage());
3272  } else {
3273  llvm_unreachable("Unsupported entry kind.");
3274  }
3275  }
3276 }
3277 
3278 /// Loads all the offload entries information from the host IR
3279 /// metadata.
3280 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3281  // If we are in target mode, load the metadata from the host IR. This code has
3282  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3283 
3284  if (!CGM.getLangOpts().OpenMPIsDevice)
3285  return;
3286 
3287  if (CGM.getLangOpts().OMPHostIRFile.empty())
3288  return;
3289 
3290  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3291  if (auto EC = Buf.getError()) {
3292  CGM.getDiags().Report(diag::err_cannot_open_file)
3293  << CGM.getLangOpts().OMPHostIRFile << EC.message();
3294  return;
3295  }
3296 
3297  llvm::LLVMContext C;
3298  auto ME = expectedToErrorOrAndEmitErrors(
3299  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3300 
3301  if (auto EC = ME.getError()) {
3302  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3304  CGM.getDiags().Report(DiagID)
3305  << CGM.getLangOpts().OMPHostIRFile << EC.message();
3306  return;
3307  }
3308 
3309  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3310  if (!MD)
3311  return;
3312 
3313  for (llvm::MDNode *MN : MD->operands()) {
3314  auto &&GetMDInt = [MN](unsigned Idx) {
3315  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3316  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3317  };
3318 
3319  auto &&GetMDString = [MN](unsigned Idx) {
3320  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3321  return V->getString();
3322  };
3323 
3324  switch (GetMDInt(0)) {
3325  default:
3326  llvm_unreachable("Unexpected metadata!");
3327  break;
3330  OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3331  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3332  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3333  /*Order=*/GetMDInt(5));
3334  break;
3337  OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3338  /*MangledName=*/GetMDString(1),
3339  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3340  /*Flags=*/GetMDInt(2)),
3341  /*Order=*/GetMDInt(3));
3342  break;
3343  }
3344  }
3345 }
3346 
3347 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3348  if (!KmpRoutineEntryPtrTy) {
3349  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3350  ASTContext &C = CGM.getContext();
3351  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3353  KmpRoutineEntryPtrQTy = C.getPointerType(
3354  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3355  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3356  }
3357 }
3358 
3359 namespace {
3360 struct PrivateHelpersTy {
3361  PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3362  const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3363  : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3364  PrivateElemInit(PrivateElemInit) {}
3365  PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3366  const Expr *OriginalRef = nullptr;
3367  const VarDecl *Original = nullptr;
3368  const VarDecl *PrivateCopy = nullptr;
3369  const VarDecl *PrivateElemInit = nullptr;
3370  bool isLocalPrivate() const {
3371  return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3372  }
3373 };
3374 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3375 } // anonymous namespace
3376 
3377 static bool isAllocatableDecl(const VarDecl *VD) {
3378  const VarDecl *CVD = VD->getCanonicalDecl();
3379  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3380  return false;
3381  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3382  // Use the default allocation.
3383  return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3384  !AA->getAllocator());
3385 }
3386 
3387 static RecordDecl *
3389  if (!Privates.empty()) {
3390  ASTContext &C = CGM.getContext();
3391  // Build struct .kmp_privates_t. {
3392  // /* private vars */
3393  // };
3394  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3395  RD->startDefinition();
3396  for (const auto &Pair : Privates) {
3397  const VarDecl *VD = Pair.second.Original;
3399  // If the private variable is a local variable with lvalue ref type,
3400  // allocate the pointer instead of the pointee type.
3401  if (Pair.second.isLocalPrivate()) {
3402  if (VD->getType()->isLValueReferenceType())
3403  Type = C.getPointerType(Type);
3404  if (isAllocatableDecl(VD))
3405  Type = C.getPointerType(Type);
3406  }
3407  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3408  if (VD->hasAttrs()) {
3409  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3410  E(VD->getAttrs().end());
3411  I != E; ++I)
3412  FD->addAttr(*I);
3413  }
3414  }
3415  RD->completeDefinition();
3416  return RD;
3417  }
3418  return nullptr;
3419 }
3420 
3421 static RecordDecl *
3423  QualType KmpInt32Ty,
3424  QualType KmpRoutineEntryPointerQTy) {
3425  ASTContext &C = CGM.getContext();
3426  // Build struct kmp_task_t {
3427  // void * shareds;
3428  // kmp_routine_entry_t routine;
3429  // kmp_int32 part_id;
3430  // kmp_cmplrdata_t data1;
3431  // kmp_cmplrdata_t data2;
3432  // For taskloops additional fields:
3433  // kmp_uint64 lb;
3434  // kmp_uint64 ub;
3435  // kmp_int64 st;
3436  // kmp_int32 liter;
3437  // void * reductions;
3438  // };
3439  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3440  UD->startDefinition();
3441  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3442  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3443  UD->completeDefinition();
3444  QualType KmpCmplrdataTy = C.getRecordType(UD);
3445  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3446  RD->startDefinition();
3447  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3448  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3449  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3450  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3451  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3453  QualType KmpUInt64Ty =
3454  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3455  QualType KmpInt64Ty =
3456  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3457  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3458  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3459  addFieldToRecordDecl(C, RD, KmpInt64Ty);
3460  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3461  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3462  }
3463  RD->completeDefinition();
3464  return RD;
3465 }
3466 
3467 static RecordDecl *
3469  ArrayRef<PrivateDataTy> Privates) {
3470  ASTContext &C = CGM.getContext();
3471  // Build struct kmp_task_t_with_privates {
3472  // kmp_task_t task_data;
3473  // .kmp_privates_t. privates;
3474  // };
3475  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3476  RD->startDefinition();
3477  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3478  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3479  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3480  RD->completeDefinition();
3481  return RD;
3482 }
3483 
3484 /// Emit a proxy function which accepts kmp_task_t as the second
3485 /// argument.
3486 /// \code
3487 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3488 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3489 /// For taskloops:
3490 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3491 /// tt->reductions, tt->shareds);
3492 /// return 0;
3493 /// }
3494 /// \endcode
3495 static llvm::Function *
3497  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3498  QualType KmpTaskTWithPrivatesPtrQTy,
3499  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3500  QualType SharedsPtrTy, llvm::Function *TaskFunction,
3501  llvm::Value *TaskPrivatesMap) {
3502  ASTContext &C = CGM.getContext();
3503  FunctionArgList Args;
3504  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3506  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3507  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3509  Args.push_back(&GtidArg);
3510  Args.push_back(&TaskTypeArg);
3511  const auto &TaskEntryFnInfo =
3512  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3513  llvm::FunctionType *TaskEntryTy =
3514  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3515  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3516  auto *TaskEntry = llvm::Function::Create(
3517  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3518  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3519  TaskEntry->setDoesNotRecurse();
3520  CodeGenFunction CGF(CGM);
3521  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3522  Loc, Loc);
3523 
3524  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3525  // tt,
3526  // For taskloops:
3527  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3528  // tt->task_data.shareds);
3529  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3530  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3531  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3532  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3533  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3534  const auto *KmpTaskTWithPrivatesQTyRD =
3535  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3536  LValue Base =
3537  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3538  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3539  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3540  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3541  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3542 
3543  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3544  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3545  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3546  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3547  CGF.ConvertTypeForMem(SharedsPtrTy));
3548 
3549  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3550  llvm::Value *PrivatesParam;
3551  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3552  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3553  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3554  PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3555  } else {
3556  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3557  }
3558 
3559  llvm::Value *CommonArgs[] = {
3560  GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3561  CGF.Builder
3563  CGF.VoidPtrTy, CGF.Int8Ty)
3564  .getPointer()};
3565  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3566  std::end(CommonArgs));
3568  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3569  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3570  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3571  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3572  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3573  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3574  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3575  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3576  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3577  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3578  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3579  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3580  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3581  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3582  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3583  CallArgs.push_back(LBParam);
3584  CallArgs.push_back(UBParam);
3585  CallArgs.push_back(StParam);
3586  CallArgs.push_back(LIParam);
3587  CallArgs.push_back(RParam);
3588  }
3589  CallArgs.push_back(SharedsParam);
3590 
3591  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3592  CallArgs);
3593  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3594  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3595  CGF.FinishFunction();
3596  return TaskEntry;
3597 }
3598 
3599 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3600  SourceLocation Loc,
3601  QualType KmpInt32Ty,
3602  QualType KmpTaskTWithPrivatesPtrQTy,
3603  QualType KmpTaskTWithPrivatesQTy) {
3604  ASTContext &C = CGM.getContext();
3605  FunctionArgList Args;
3606  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3608  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3609  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3611  Args.push_back(&GtidArg);
3612  Args.push_back(&TaskTypeArg);
3613  const auto &DestructorFnInfo =
3614  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3615  llvm::FunctionType *DestructorFnTy =
3616  CGM.getTypes().GetFunctionType(DestructorFnInfo);
3617  std::string Name =
3618  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3619  auto *DestructorFn =
3620  llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3621  Name, &CGM.getModule());
3622  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3623  DestructorFnInfo);
3624  DestructorFn->setDoesNotRecurse();
3625  CodeGenFunction CGF(CGM);
3626  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3627  Args, Loc, Loc);
3628 
3630  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3631  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3632  const auto *KmpTaskTWithPrivatesQTyRD =
3633  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3634  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3635  Base = CGF.EmitLValueForField(Base, *FI);
3636  for (const auto *Field :
3637  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3638  if (QualType::DestructionKind DtorKind =
3639  Field->getType().isDestructedType()) {
3640  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3641  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3642  }
3643  }
3644  CGF.FinishFunction();
3645  return DestructorFn;
3646 }
3647 
3648 /// Emit a privates mapping function for correct handling of private and
3649 /// firstprivate variables.
3650 /// \code
3651 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3652 /// **noalias priv1,..., <tyn> **noalias privn) {
3653 /// *priv1 = &.privates.priv1;
3654 /// ...;
3655 /// *privn = &.privates.privn;
3656 /// }
3657 /// \endcode
3658 static llvm::Value *
3660  const OMPTaskDataTy &Data, QualType PrivatesQTy,
3661  ArrayRef<PrivateDataTy> Privates) {
3662  ASTContext &C = CGM.getContext();
3663  FunctionArgList Args;
3664  ImplicitParamDecl TaskPrivatesArg(
3665  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3666  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3668  Args.push_back(&TaskPrivatesArg);
3669  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3670  unsigned Counter = 1;
3671  for (const Expr *E : Data.PrivateVars) {
3672  Args.push_back(ImplicitParamDecl::Create(
3673  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3674  C.getPointerType(C.getPointerType(E->getType()))
3675  .withConst()
3676  .withRestrict(),
3678  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3679  PrivateVarsPos[VD] = Counter;
3680  ++Counter;
3681  }
3682  for (const Expr *E : Data.FirstprivateVars) {
3683  Args.push_back(ImplicitParamDecl::Create(
3684  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3685  C.getPointerType(C.getPointerType(E->getType()))
3686  .withConst()
3687  .withRestrict(),
3689  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3690  PrivateVarsPos[VD] = Counter;
3691  ++Counter;
3692  }
3693  for (const Expr *E : Data.LastprivateVars) {
3694  Args.push_back(ImplicitParamDecl::Create(
3695  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3696  C.getPointerType(C.getPointerType(E->getType()))
3697  .withConst()
3698  .withRestrict(),
3700  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3701  PrivateVarsPos[VD] = Counter;
3702  ++Counter;
3703  }
3704  for (const VarDecl *VD : Data.PrivateLocals) {
3705  QualType Ty = VD->getType().getNonReferenceType();
3706  if (VD->getType()->isLValueReferenceType())
3707  Ty = C.getPointerType(Ty);
3708  if (isAllocatableDecl(VD))
3709  Ty = C.getPointerType(Ty);
3710  Args.push_back(ImplicitParamDecl::Create(
3711  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3712  C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3714  PrivateVarsPos[VD] = Counter;
3715  ++Counter;
3716  }
3717  const auto &TaskPrivatesMapFnInfo =
3718  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3719  llvm::FunctionType *TaskPrivatesMapTy =
3720  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3721  std::string Name =
3722  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3723  auto *TaskPrivatesMap = llvm::Function::Create(
3724  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3725  &CGM.getModule());
3726  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3727  TaskPrivatesMapFnInfo);
3728  if (CGM.getLangOpts().Optimize) {
3729  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3730  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3731  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3732  }
3733  CodeGenFunction CGF(CGM);
3734  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3735  TaskPrivatesMapFnInfo, Args, Loc, Loc);
3736 
3737  // *privi = &.privates.privi;
3739  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3740  TaskPrivatesArg.getType()->castAs<PointerType>());
3741  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3742  Counter = 0;
3743  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3744  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3745  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3746  LValue RefLVal =
3747  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3748  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3749  RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3750  CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3751  ++Counter;
3752  }
3753  CGF.FinishFunction();
3754  return TaskPrivatesMap;
3755 }
3756 
3757 /// Emit initialization for private variables in task-based directives.
3759  const OMPExecutableDirective &D,
3760  Address KmpTaskSharedsPtr, LValue TDBase,
3761  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3762  QualType SharedsTy, QualType SharedsPtrTy,
3763  const OMPTaskDataTy &Data,
3764  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3765  ASTContext &C = CGF.getContext();
3766  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3767  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3769  ? OMPD_taskloop
3770  : OMPD_task;
3771  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3772  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3773  LValue SrcBase;
3774  bool IsTargetTask =
3777  // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3778  // PointersArray, SizesArray, and MappersArray. The original variables for
3779  // these arrays are not captured and we get their addresses explicitly.
3780  if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3781  (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3782  SrcBase = CGF.MakeAddrLValue(
3784  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3785  CGF.ConvertTypeForMem(SharedsTy)),
3786  SharedsTy);
3787  }
3788  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3789  for (const PrivateDataTy &Pair : Privates) {
3790  // Do not initialize private locals.
3791  if (Pair.second.isLocalPrivate()) {
3792  ++FI;
3793  continue;
3794  }
3795  const VarDecl *VD = Pair.second.PrivateCopy;
3796  const Expr *Init = VD->getAnyInitializer();
3797  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3798  !CGF.isTrivialInitializer(Init)))) {
3799  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3800  if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3801  const VarDecl *OriginalVD = Pair.second.Original;
3802  // Check if the variable is the target-based BasePointersArray,
3803  // PointersArray, SizesArray, or MappersArray.
3804  LValue SharedRefLValue;
3805  QualType Type = PrivateLValue.getType();
3806  const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3807  if (IsTargetTask && !SharedField) {
3808  assert(isa<ImplicitParamDecl>(OriginalVD) &&
3809  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3810  cast<CapturedDecl>(OriginalVD->getDeclContext())
3811  ->getNumParams() == 0 &&
3812  isa<TranslationUnitDecl>(
3813  cast<CapturedDecl>(OriginalVD->getDeclContext())
3814  ->getDeclContext()) &&
3815  "Expected artificial target data variable.");
3816  SharedRefLValue =
3817  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3818  } else if (ForDup) {
3819  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3820  SharedRefLValue = CGF.MakeAddrLValue(
3821  SharedRefLValue.getAddress(CGF).withAlignment(
3822  C.getDeclAlign(OriginalVD)),
3823  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3824  SharedRefLValue.getTBAAInfo());
3825  } else if (CGF.LambdaCaptureFields.count(
3826  Pair.second.Original->getCanonicalDecl()) > 0 ||
3827  isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3828  SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3829  } else {
3830  // Processing for implicitly captured variables.
3831  InlinedOpenMPRegionRAII Region(
3832  CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3833  /*HasCancel=*/false, /*NoInheritance=*/true);
3834  SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3835  }
3836  if (Type->isArrayType()) {
3837  // Initialize firstprivate array.
3838  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3839  // Perform simple memcpy.
3840  CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3841  } else {
3842  // Initialize firstprivate array using element-by-element
3843  // initialization.
3845  PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3846  Type,
3847  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3848  Address SrcElement) {
3849  // Clean up any temporaries needed by the initialization.
3850  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3851  InitScope.addPrivate(Elem, SrcElement);
3852  (void)InitScope.Privatize();
3853  // Emit initialization for single element.
3854  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3855  CGF, &CapturesInfo);
3856  CGF.EmitAnyExprToMem(Init, DestElement,
3857  Init->getType().getQualifiers(),
3858  /*IsInitializer=*/false);
3859  });
3860  }
3861  } else {
3862  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3863  InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3864  (void)InitScope.Privatize();
3865  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3866  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3867  /*capturedByInit=*/false);
3868  }
3869  } else {
3870  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3871  }
3872  }
3873  ++FI;
3874  }
3875 }
3876 
3877 /// Check if duplication function is required for taskloops.
3879  ArrayRef<PrivateDataTy> Privates) {
3880  bool InitRequired = false;
3881  for (const PrivateDataTy &Pair : Privates) {
3882  if (Pair.second.isLocalPrivate())
3883  continue;
3884  const VarDecl *VD = Pair.second.PrivateCopy;
3885  const Expr *Init = VD->getAnyInitializer();
3886  InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3887  !CGF.isTrivialInitializer(Init));
3888  if (InitRequired)
3889  break;
3890  }
3891  return InitRequired;
3892 }
3893 
3894 
3895 /// Emit task_dup function (for initialization of
3896 /// private/firstprivate/lastprivate vars and last_iter flag)
3897 /// \code
3898 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3899 /// lastpriv) {
3900 /// // setup lastprivate flag
3901 /// task_dst->last = lastpriv;
3902 /// // could be constructor calls here...
3903 /// }
3904 /// \endcode
3905 static llvm::Value *
3907  const OMPExecutableDirective &D,
3908  QualType KmpTaskTWithPrivatesPtrQTy,
3909  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3910  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3911  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3912  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3913  ASTContext &C = CGM.getContext();
3914  FunctionArgList Args;
3915  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3916  KmpTaskTWithPrivatesPtrQTy,
3918  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3919  KmpTaskTWithPrivatesPtrQTy,
3921  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3923  Args.push_back(&DstArg);
3924  Args.push_back(&SrcArg);
3925  Args.push_back(&LastprivArg);
3926  const auto &TaskDupFnInfo =
3927  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3928  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3929  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3930  auto *TaskDup = llvm::Function::Create(
3931  TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3932  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3933  TaskDup->setDoesNotRecurse();
3934  CodeGenFunction CGF(CGM);
3935  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3936  Loc);
3937 
3938  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3939  CGF.GetAddrOfLocalVar(&DstArg),
3940  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3941  // task_dst->liter = lastpriv;
3942  if (WithLastIter) {
3943  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3945  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3946  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3947  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3948  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3949  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3950  }
3951 
3952  // Emit initial values for private copies (if any).
3953  assert(!Privates.empty());
3954  Address KmpTaskSharedsPtr = Address::invalid();
3955  if (!Data.FirstprivateVars.empty()) {
3956  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3957  CGF.GetAddrOfLocalVar(&SrcArg),
3958  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3960  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3961  KmpTaskSharedsPtr = Address(
3963  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3964  KmpTaskTShareds)),
3965  Loc),
3966  CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3967  }
3968  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3969  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3970  CGF.FinishFunction();
3971  return TaskDup;
3972 }
3973 
3974 /// Checks if destructor function is required to be generated.
3975 /// \return true if cleanups are required, false otherwise.
3976 static bool
3977 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3978  ArrayRef<PrivateDataTy> Privates) {
3979  for (const PrivateDataTy &P : Privates) {
3980  if (P.second.isLocalPrivate())
3981  continue;
3982  QualType Ty = P.second.Original->getType().getNonReferenceType();
3983  if (Ty.isDestructedType())
3984  return true;
3985  }
3986  return false;
3987 }
3988 
3989 namespace {
3990 /// Loop generator for OpenMP iterator expression.
3991 class OMPIteratorGeneratorScope final
3993  CodeGenFunction &CGF;
3994  const OMPIteratorExpr *E = nullptr;
3997  OMPIteratorGeneratorScope() = delete;
3998  OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3999 
4000 public:
4001  OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4002  : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4003  if (!E)
4004  return;
4006  for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4007  Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4008  const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4009  addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4010  const OMPIteratorHelperData &HelperData = E->getHelper(I);
4011  addPrivate(
4012  HelperData.CounterVD,
4013  CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4014  }
4015  Privatize();
4016 
4017  for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4018  const OMPIteratorHelperData &HelperData = E->getHelper(I);
4019  LValue CLVal =
4020  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4021  HelperData.CounterVD->getType());
4022  // Counter = 0;
4023  CGF.EmitStoreOfScalar(
4024  llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4025  CLVal);
4026  CodeGenFunction::JumpDest &ContDest =
4027  ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4028  CodeGenFunction::JumpDest &ExitDest =
4029  ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4030  // N = <number-of_iterations>;
4031  llvm::Value *N = Uppers[I];
4032  // cont:
4033  // if (Counter < N) goto body; else goto exit;
4034  CGF.EmitBlock(ContDest.getBlock());
4035  auto *CVal =
4036  CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4037  llvm::Value *Cmp =
4039  ? CGF.Builder.CreateICmpSLT(CVal, N)
4040  : CGF.Builder.CreateICmpULT(CVal, N);
4041  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4042  CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4043  // body:
4044  CGF.EmitBlock(BodyBB);
4045  // Iteri = Begini + Counter * Stepi;
4046  CGF.EmitIgnoredExpr(HelperData.Update);
4047  }
4048  }
4049  ~OMPIteratorGeneratorScope() {
4050  if (!E)
4051  return;
4052  for (unsigned I = E->numOfIterators(); I > 0; --I) {
4053  // Counter = Counter + 1;
4054  const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4055  CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4056  // goto cont;
4057  CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4058  // exit:
4059  CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4060  }
4061  }
4062 };
4063 } // namespace
4064 
4065 static std::pair<llvm::Value *, llvm::Value *>
4067  const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4068  llvm::Value *Addr;
4069  if (OASE) {
4070  const Expr *Base = OASE->getBase();
4071  Addr = CGF.EmitScalarExpr(Base);
4072  } else {
4073  Addr = CGF.EmitLValue(E).getPointer(CGF);
4074  }
4075  llvm::Value *SizeVal;
4076  QualType Ty = E->getType();
4077  if (OASE) {
4078  SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4079  for (const Expr *SE : OASE->getDimensions()) {
4080  llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4081  Sz = CGF.EmitScalarConversion(
4082  Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4083  SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4084  }
4085  } else if (const auto *ASE =
4086  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4087  LValue UpAddrLVal =
4088  CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4089  Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4090  llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4091  UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4092  llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4093  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4094  SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4095  } else {
4096  SizeVal = CGF.getTypeSize(Ty);
4097  }
4098  return std::make_pair(Addr, SizeVal);
4099 }
4100 
4101 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4102 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4103  QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4104  if (KmpTaskAffinityInfoTy.isNull()) {
4105  RecordDecl *KmpAffinityInfoRD =
4106  C.buildImplicitRecord("kmp_task_affinity_info_t");
4107  KmpAffinityInfoRD->startDefinition();
4108  addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4109  addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4110  addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4111  KmpAffinityInfoRD->completeDefinition();
4112  KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4113  }
4114 }
4115 
4116 CGOpenMPRuntime::TaskResultTy
4117 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4118  const OMPExecutableDirective &D,
4119  llvm::Function *TaskFunction, QualType SharedsTy,
4120  Address Shareds, const OMPTaskDataTy &Data) {
4121  ASTContext &C = CGM.getContext();
4123  // Aggregate privates and sort them by the alignment.
4124  const auto *I = Data.PrivateCopies.begin();
4125  for (const Expr *E : Data.PrivateVars) {
4126  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4127  Privates.emplace_back(
4128  C.getDeclAlign(VD),
4129  PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4130  /*PrivateElemInit=*/nullptr));
4131  ++I;
4132  }
4133  I = Data.FirstprivateCopies.begin();
4134  const auto *IElemInitRef = Data.FirstprivateInits.begin();
4135  for (const Expr *E : Data.FirstprivateVars) {
4136  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4137  Privates.emplace_back(
4138  C.getDeclAlign(VD),
4139  PrivateHelpersTy(
4140  E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4141  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4142  ++I;
4143  ++IElemInitRef;
4144  }
4145  I = Data.LastprivateCopies.begin();
4146  for (const Expr *E : Data.LastprivateVars) {
4147  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4148  Privates.emplace_back(
4149  C.getDeclAlign(VD),
4150  PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4151  /*PrivateElemInit=*/nullptr));
4152  ++I;
4153  }
4154  for (const VarDecl *VD : Data.PrivateLocals) {
4155  if (isAllocatableDecl(VD))
4156  Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4157  else
4158  Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4159  }
4160  llvm::stable_sort(Privates,
4161  [](const PrivateDataTy &L, const PrivateDataTy &R) {
4162  return L.first > R.first;
4163  });
4164  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4165  // Build type kmp_routine_entry_t (if not built yet).
4166  emitKmpRoutineEntryT(KmpInt32Ty);
4167  // Build type kmp_task_t (if not built yet).
4169  if (SavedKmpTaskloopTQTy.isNull()) {
4170  SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4171  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4172  }
4173  KmpTaskTQTy = SavedKmpTaskloopTQTy;
4174  } else {
4175  assert((D.getDirectiveKind() == OMPD_task ||
4178  "Expected taskloop, task or target directive");
4179  if (SavedKmpTaskTQTy.isNull()) {
4180  SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4181  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4182  }
4183  KmpTaskTQTy = SavedKmpTaskTQTy;
4184  }
4185  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4186  // Build particular struct kmp_task_t for the given task.
4187  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4188  createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4189  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4190  QualType KmpTaskTWithPrivatesPtrQTy =
4191  C.getPointerType(KmpTaskTWithPrivatesQTy);
4192  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4193  llvm::Type *KmpTaskTWithPrivatesPtrTy =
4194  KmpTaskTWithPrivatesTy->getPointerTo();
4195  llvm::Value *KmpTaskTWithPrivatesTySize =
4196  CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4197  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4198 
4199  // Emit initial values for private copies (if any).
4200  llvm::Value *TaskPrivatesMap = nullptr;
4201  llvm::Type *TaskPrivatesMapTy =
4202  std::next(TaskFunction->arg_begin(), 3)->getType();
4203  if (!Privates.empty()) {
4204  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4205  TaskPrivatesMap =
4206  emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4207  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4208  TaskPrivatesMap, TaskPrivatesMapTy);
4209  } else {
4210  TaskPrivatesMap = llvm::ConstantPointerNull::get(
4211  cast<llvm::PointerType>(TaskPrivatesMapTy));
4212  }
4213  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4214  // kmp_task_t *tt);
4215  llvm::Function *TaskEntry = emitProxyTaskFunction(
4216  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4217  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4218  TaskPrivatesMap);
4219 
4220  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4221  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4222  // kmp_routine_entry_t *task_entry);
4223  // Task flags. Format is taken from
4224  // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4225  // description of kmp_tasking_flags struct.
4226  enum {
4227  TiedFlag = 0x1,
4228  FinalFlag = 0x2,
4229  DestructorsFlag = 0x8,
4230  PriorityFlag = 0x20,
4231  DetachableFlag = 0x40,
4232  };
4233  unsigned Flags = Data.Tied ? TiedFlag : 0;
4234  bool NeedsCleanup = false;
4235  if (!Privates.empty()) {
4236  NeedsCleanup =
4237  checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4238  if (NeedsCleanup)
4239  Flags = Flags | DestructorsFlag;
4240  }
4241  if (Data.Priority.getInt())
4242  Flags = Flags | PriorityFlag;
4244  Flags = Flags | DetachableFlag;
4245  llvm::Value *TaskFlags =
4246  Data.Final.getPointer()
4247  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4248  CGF.Builder.getInt32(FinalFlag),
4249  CGF.Builder.getInt32(/*C=*/0))
4250  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4251  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4252  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4253  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4254  getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4256  TaskEntry, KmpRoutineEntryPtrTy)};
4257  llvm::Value *NewTask;
4258  if (D.hasClausesOfKind<OMPNowaitClause>()) {
4259  // Check if we have any device clause associated with the directive.
4260  const Expr *Device = nullptr;
4261  if (auto *C = D.getSingleClause<OMPDeviceClause>())
4262  Device = C->getDevice();
4263  // Emit device ID if any otherwise use default value.
4264  llvm::Value *DeviceID;
4265  if (Device)
4266  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4267  CGF.Int64Ty, /*isSigned=*/true);
4268  else
4269  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4270  AllocArgs.push_back(DeviceID);
4271  NewTask = CGF.EmitRuntimeCall(
4272  OMPBuilder.getOrCreateRuntimeFunction(
4273  CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4274  AllocArgs);
4275  } else {
4276  NewTask =
4277  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4278  CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4279  AllocArgs);
4280  }
4281  // Emit detach clause initialization.
4282  // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4283  // task_descriptor);
4284  if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4285  const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4286  LValue EvtLVal = CGF.EmitLValue(Evt);
4287 
4288  // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4289  // int gtid, kmp_task_t *task);
4290  llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4291  llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4292  Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4293  llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4294  OMPBuilder.getOrCreateRuntimeFunction(
4295  CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4296  {Loc, Tid, NewTask});
4297  EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4298  Evt->getExprLoc());
4299  CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4300  }
4301  // Process affinity clauses.
4303  // Process list of affinity data.
4304  ASTContext &C = CGM.getContext();
4305  Address AffinitiesArray = Address::invalid();
4306  // Calculate number of elements to form the array of affinity data.
4307  llvm::Value *NumOfElements = nullptr;
4308  unsigned NumAffinities = 0;
4309  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4310  if (const Expr *Modifier = C->getModifier()) {
4311  const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4312  for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4313  llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4314  Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4315  NumOfElements =
4316  NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4317  }
4318  } else {
4319  NumAffinities += C->varlist_size();
4320  }
4321  }
4322  getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4323  // Fields ids in kmp_task_affinity_info record.
4324  enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4325 
4326  QualType KmpTaskAffinityInfoArrayTy;
4327  if (NumOfElements) {
4328  NumOfElements = CGF.Builder.CreateNUWAdd(
4329  llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4330  auto *OVE = new (C) OpaqueValueExpr(
4331  Loc,
4332  C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4333  VK_PRValue);
4334  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4335  RValue::get(NumOfElements));
4336  KmpTaskAffinityInfoArrayTy =
4337  C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4338  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4339  // Properly emit variable-sized array.
4340  auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4342  CGF.EmitVarDecl(*PD);
4343  AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4344  NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4345  /*isSigned=*/false);
4346  } else {
4347  KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4348  KmpTaskAffinityInfoTy,
4349  llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4350  ArrayType::Normal, /*IndexTypeQuals=*/0);
4351  AffinitiesArray =
4352  CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4353  AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4354  NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4355  /*isSigned=*/false);
4356  }
4357 
4358  const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4359  // Fill array by elements without iterators.
4360  unsigned Pos = 0;
4361  bool HasIterator = false;
4362  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4363  if (C->getModifier()) {
4364  HasIterator = true;
4365  continue;
4366  }
4367  for (const Expr *E : C->varlists()) {
4368  llvm::Value *Addr;
4369  llvm::Value *Size;
4370  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4371  LValue Base =
4372  CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4373  KmpTaskAffinityInfoTy);
4374  // affs[i].base_addr = &<Affinities[i].second>;
4375  LValue BaseAddrLVal = CGF.EmitLValueForField(
4376  Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4377  CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4378  BaseAddrLVal);
4379  // affs[i].len = sizeof(<Affinities[i].second>);
4380  LValue LenLVal = CGF.EmitLValueForField(
4381  Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4382  CGF.EmitStoreOfScalar(Size, LenLVal);
4383  ++Pos;
4384  }
4385  }
4386  LValue PosLVal;
4387  if (HasIterator) {
4388  PosLVal = CGF.MakeAddrLValue(
4389  CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4390  C.getSizeType());
4391  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4392  }
4393  // Process elements with iterators.
4394  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395  const Expr *Modifier = C->getModifier();
4396  if (!Modifier)
4397  continue;
4398  OMPIteratorGeneratorScope IteratorScope(
4399  CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4400  for (const Expr *E : C->varlists()) {
4401  llvm::Value *Addr;
4402  llvm::Value *Size;
4403  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4405  LValue Base = CGF.MakeAddrLValue(
4406  CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4407  // affs[i].base_addr = &<Affinities[i].second>;
4408  LValue BaseAddrLVal = CGF.EmitLValueForField(
4409  Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410  CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411  BaseAddrLVal);
4412  // affs[i].len = sizeof(<Affinities[i].second>);
4413  LValue LenLVal = CGF.EmitLValueForField(
4414  Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415  CGF.EmitStoreOfScalar(Size, LenLVal);
4416  Idx = CGF.Builder.CreateNUWAdd(
4417  Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4418  CGF.EmitStoreOfScalar(Idx, PosLVal);
4419  }
4420  }
4421  // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4422  // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4423  // naffins, kmp_task_affinity_info_t *affin_list);
4424  llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4425  llvm::Value *GTid = getThreadID(CGF, Loc);
4426  llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427  AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4428  // FIXME: Emit the function and ignore its result for now unless the
4429  // runtime function is properly implemented.
4430  (void)CGF.EmitRuntimeCall(
4431  OMPBuilder.getOrCreateRuntimeFunction(
4432  CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4433  {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4434  }
4435  llvm::Value *NewTaskNewTaskTTy =
4437  NewTask, KmpTaskTWithPrivatesPtrTy);
4438  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4439  KmpTaskTWithPrivatesQTy);
4440  LValue TDBase =
4441  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4442  // Fill the data in the resulting kmp_task_t record.
4443  // Copy shareds if there are any.
4444  Address KmpTaskSharedsPtr = Address::invalid();
4445  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4446  KmpTaskSharedsPtr = Address(
4447  CGF.EmitLoadOfScalar(
4448  CGF.EmitLValueForField(
4449  TDBase,
4450  *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4451  Loc),
4452  CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4453  LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4454  LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4455  CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4456  }
4457  // Emit initial values for private copies (if any).
4458  TaskResultTy Result;
4459  if (!Privates.empty()) {
4460  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4461  SharedsTy, SharedsPtrTy, Data, Privates,
4462  /*ForDup=*/false);
4464  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4465  Result.TaskDupFn = emitTaskDupFunction(
4466  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4467  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4468  /*WithLastIter=*/!Data.LastprivateVars.empty());
4469  }
4470  }
4471  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4472  enum { Priority = 0, Destructors = 1 };
4473  // Provide pointer to function with destructors for privates.
4474  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4475  const RecordDecl *KmpCmplrdataUD =
4476  (*FI)->getType()->getAsUnionType()->getDecl();
4477  if (NeedsCleanup) {
4478  llvm::Value *DestructorFn = emitDestructorsFunction(
4479  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4480  KmpTaskTWithPrivatesQTy);
4481  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4482  LValue DestructorsLV = CGF.EmitLValueForField(
4483  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4485  DestructorFn, KmpRoutineEntryPtrTy),
4486  DestructorsLV);
4487  }
4488  // Set priority.
4489  if (Data.Priority.getInt()) {
4490  LValue Data2LV = CGF.EmitLValueForField(
4491  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4492  LValue PriorityLV = CGF.EmitLValueForField(
4493  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4494  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4495  }
4496  Result.NewTask = NewTask;
4497  Result.TaskEntry = TaskEntry;
4498  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4499  Result.TDBase = TDBase;
4500  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4501  return Result;
4502 }
4503 
4504 namespace {
4505 /// Dependence kind for RTL.
4506 enum RTLDependenceKindTy {
4507  DepIn = 0x01,
4508  DepInOut = 0x3,
4509  DepMutexInOutSet = 0x4,
4510  DepInOutSet = 0x8
4511 };
4512 /// Fields ids in kmp_depend_info record.
4513 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4514 } // namespace
4515 
4516 /// Translates internal dependency kind into the runtime kind.
4517 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4518  RTLDependenceKindTy DepKind;
4519  switch (K) {
4520  case OMPC_DEPEND_in:
4521  DepKind = DepIn;
4522  break;
4523  // Out and InOut dependencies must use the same code.
4524  case OMPC_DEPEND_out:
4525  case OMPC_DEPEND_inout:
4526  DepKind = DepInOut;
4527  break;
4528  case OMPC_DEPEND_mutexinoutset:
4529  DepKind = DepMutexInOutSet;
4530  break;
4531  case OMPC_DEPEND_inoutset:
4532  DepKind = DepInOutSet;
4533  break;
4534  case OMPC_DEPEND_source:
4535  case OMPC_DEPEND_sink:
4536  case OMPC_DEPEND_depobj:
4537  case OMPC_DEPEND_unknown:
4538  llvm_unreachable("Unknown task dependence type");
4539  }
4540  return DepKind;
4541 }
4542 
4543 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4544 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4545  QualType &FlagsTy) {
4546  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4547  if (KmpDependInfoTy.isNull()) {
4548  RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4549  KmpDependInfoRD->startDefinition();
4550  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4551  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4552  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4553  KmpDependInfoRD->completeDefinition();
4554  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4555  }
4556 }
4557 
4558 std::pair<llvm::Value *, LValue>
4559 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4560  SourceLocation Loc) {
4561  ASTContext &C = CGM.getContext();
4562  QualType FlagsTy;
4563  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4564  RecordDecl *KmpDependInfoRD =
4565  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4566  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4569  DepobjLVal.getAddress(CGF),
4570  CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4571  KmpDependInfoPtrTy->castAs<PointerType>());
4572  Address DepObjAddr = CGF.Builder.CreateGEP(
4573  Base.getAddress(CGF),
4574  llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4575  LValue NumDepsBase = CGF.MakeAddrLValue(
4576  DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4577  // NumDeps = deps[i].base_addr;
4578  LValue BaseAddrLVal = CGF.EmitLValueForField(
4579  NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4580  llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4581  return std::make_pair(NumDeps, Base);
4582 }
4583 
4584 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4585  llvm::PointerUnion<unsigned *, LValue *> Pos,
4586  const OMPTaskDataTy::DependData &Data,
4587  Address DependenciesArray) {
4588  CodeGenModule &CGM = CGF.CGM;
4589  ASTContext &C = CGM.getContext();
4590  QualType FlagsTy;
4591  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4592  RecordDecl *KmpDependInfoRD =
4593  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4594  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4595 
4596  OMPIteratorGeneratorScope IteratorScope(
4597  CGF, cast_or_null<OMPIteratorExpr>(
4599  : nullptr));
4600  for (const Expr *E : Data.DepExprs) {
4601  llvm::Value *Addr;
4602  llvm::Value *Size;
4603  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4604  LValue Base;
4605  if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4606  Base = CGF.MakeAddrLValue(
4607  CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4608  } else {
4609  LValue &PosLVal = *Pos.get<LValue *>();
4610  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4611  Base = CGF.MakeAddrLValue(
4612  CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4613  }
4614  // deps[i].base_addr = &<Dependencies[i].second>;
4615  LValue BaseAddrLVal = CGF.EmitLValueForField(
4616  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4617  CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4618  BaseAddrLVal);
4619  // deps[i].len = sizeof(<Dependencies[i].second>);
4620  LValue LenLVal = CGF.EmitLValueForField(
4621  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4622  CGF.EmitStoreOfScalar(Size, LenLVal);
4623  // deps[i].flags = <Dependencies[i].first>;
4624  RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4625  LValue FlagsLVal = CGF.EmitLValueForField(
4626  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4627  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4628  FlagsLVal);
4629  if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4630  ++(*P);
4631  } else {
4632  LValue &PosLVal = *Pos.get<LValue *>();
4633  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4634  Idx = CGF.Builder.CreateNUWAdd(Idx,
4635  llvm::ConstantInt::get(Idx->getType(), 1));
4636  CGF.EmitStoreOfScalar(Idx, PosLVal);
4637  }
4638  }
4639 }
4640 
4641 SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4642  CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4643  const OMPTaskDataTy::DependData &Data) {
4644  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4645  "Expected depobj dependecy kind.");
4647  SmallVector<LValue, 4> SizeLVals;
4648  ASTContext &C = CGF.getContext();
4649  {
4650  OMPIteratorGeneratorScope IteratorScope(
4651  CGF, cast_or_null<OMPIteratorExpr>(
4653  : nullptr));
4654  for (const Expr *E : Data.DepExprs) {
4655  llvm::Value *NumDeps;
4656  LValue Base;
4657  LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4658  std::tie(NumDeps, Base) =
4659  getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4660  LValue NumLVal = CGF.MakeAddrLValue(
4661  CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4662  C.getUIntPtrType());
4663  CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4664  NumLVal.getAddress(CGF));
4665  llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4666  llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4667  CGF.EmitStoreOfScalar(Add, NumLVal);
4668  SizeLVals.push_back(NumLVal);
4669  }
4670  }
4671  for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4672  llvm::Value *Size =
4673  CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4674  Sizes.push_back(Size);
4675  }
4676  return Sizes;
4677 }
4678 
4679 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4680  QualType &KmpDependInfoTy,
4681  LValue PosLVal,
4682  const OMPTaskDataTy::DependData &Data,
4683  Address DependenciesArray) {
4684  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4685  "Expected depobj dependecy kind.");
4686  llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4687  {
4688  OMPIteratorGeneratorScope IteratorScope(
4689  CGF, cast_or_null<OMPIteratorExpr>(
4691  : nullptr));
4692  for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4693  const Expr *E = Data.DepExprs[I];
4694  llvm::Value *NumDeps;
4695  LValue Base;
4696  LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4697  std::tie(NumDeps, Base) =
4698  getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4699 
4700  // memcopy dependency data.
4701  llvm::Value *Size = CGF.Builder.CreateNUWMul(
4702  ElSize,
4703  CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4704  llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4705  Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4706  CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4707 
4708  // Increase pos.
4709  // pos += size;
4710  llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4711  CGF.EmitStoreOfScalar(Add, PosLVal);
4712  }
4713  }
4714 }
4715 
4716 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4718  SourceLocation Loc) {
4719  if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4720  return D.DepExprs.empty();
4721  }))
4722  return std::make_pair(nullptr, Address::invalid());
4723  // Process list of dependencies.
4724  ASTContext &C = CGM.getContext();
4725  Address DependenciesArray = Address::invalid();
4726  llvm::Value *NumOfElements = nullptr;
4727  unsigned NumDependencies = std::accumulate(
4728  Dependencies.begin(), Dependencies.end(), 0,
4729  [](unsigned V, const OMPTaskDataTy::DependData &D) {
4730  return D.DepKind == OMPC_DEPEND_depobj
4731  ? V
4732  : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4733  });
4734  QualType FlagsTy;
4735  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4736  bool HasDepobjDeps = false;
4737  bool HasRegularWithIterators = false;
4738  llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4739  llvm::Value *NumOfRegularWithIterators =
4740  llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4741  // Calculate number of depobj dependecies and regular deps with the iterators.
4742  for (const OMPTaskDataTy::DependData &D : Dependencies) {
4743  if (D.DepKind == OMPC_DEPEND_depobj) {
4745  emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4746  for (llvm::Value *Size : Sizes) {
4747  NumOfDepobjElements =
4748  CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4749  }
4750  HasDepobjDeps = true;
4751  continue;
4752  }
4753  // Include number of iterations, if any.
4754 
4755  if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4756  for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4757  llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4758  Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4759  llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4760  Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4761  NumOfRegularWithIterators =
4762  CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4763  }
4764  HasRegularWithIterators = true;
4765  continue;
4766  }
4767  }
4768 
4769  QualType KmpDependInfoArrayTy;
4770  if (HasDepobjDeps || HasRegularWithIterators) {
4771  NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4772  /*isSigned=*/false);
4773  if (HasDepobjDeps) {
4774  NumOfElements =
4775  CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4776  }
4777  if (HasRegularWithIterators) {
4778  NumOfElements =
4779  CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4780  }
4781  auto *OVE = new (C) OpaqueValueExpr(
4782  Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4783  VK_PRValue);
4784  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4785  RValue::get(NumOfElements));
4786  KmpDependInfoArrayTy =
4787  C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4788  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4789  // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4790  // Properly emit variable-sized array.
4791  auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4793  CGF.EmitVarDecl(*PD);
4794  DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4795  NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4796  /*isSigned=*/false);
4797  } else {
4798  KmpDependInfoArrayTy = C.getConstantArrayType(
4799  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4800  ArrayType::Normal, /*IndexTypeQuals=*/0);
4801  DependenciesArray =
4802  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4803  DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4804  NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4805  /*isSigned=*/false);
4806  }
4807  unsigned Pos = 0;
4808  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4809  if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4810  Dependencies[I].IteratorExpr)
4811  continue;
4812  emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4813  DependenciesArray);
4814  }
4815  // Copy regular dependecies with iterators.
4816  LValue PosLVal = CGF.MakeAddrLValue(
4817  CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4818  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4819  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4820  if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4821  !Dependencies[I].IteratorExpr)
4822  continue;
4823  emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4824  DependenciesArray);
4825  }
4826  // Copy final depobj arrays without iterators.
4827  if (HasDepobjDeps) {
4828  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4829  if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4830  continue;
4831  emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4832  DependenciesArray);
4833  }
4834  }
4835  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4836  DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4837  return std::make_pair(NumOfElements, DependenciesArray);
4838 }
4839 
4841  CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4842  SourceLocation Loc) {
4843  if (Dependencies.DepExprs.empty())
4844  return Address::invalid();
4845  // Process list of dependencies.
4846  ASTContext &C = CGM.getContext();
4847  Address DependenciesArray = Address::invalid();
4848  unsigned NumDependencies = Dependencies.DepExprs.size();
4849  QualType FlagsTy;
4850  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4851  RecordDecl *KmpDependInfoRD =
4852  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4853 
4854  llvm::Value *Size;
4855  // Define type kmp_depend_info[<Dependencies.size()>];
4856  // For depobj reserve one extra element to store the number of elements.
4857  // It is required to handle depobj(x) update(in) construct.
4858  // kmp_depend_info[<Dependencies.size()>] deps;
4859  llvm::Value *NumDepsVal;
4860  CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4861  if (const auto *IE =
4862  cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4863  NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4864  for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4865  llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4866  Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4867  NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4868  }
4869  Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4870  NumDepsVal);
4871  CharUnits SizeInBytes =
4872  C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4873  llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4874  Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4875  NumDepsVal =
4876  CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4877  } else {
4878  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4879  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4880  nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4881  CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4882  Size = CGM.getSize(Sz.alignTo(Align));
4883  NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4884  }
4885  // Need to allocate on the dynamic memory.
4886  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4887  // Use default allocator.
4888  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4889  llvm::Value *Args[] = {ThreadID, Size, Allocator};
4890 
4891  llvm::Value *Addr =
4892  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4893  CGM.getModule(), OMPRTL___kmpc_alloc),
4894  Args, ".dep.arr.addr");
4895  llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4897  Addr, KmpDependInfoLlvmTy->getPointerTo());
4898  DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4899  // Write number of elements in the first element of array for depobj.
4900  LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4901  // deps[i].base_addr = NumDependencies;
4902  LValue BaseAddrLVal = CGF.EmitLValueForField(
4903  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4904  CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4905  llvm::PointerUnion<unsigned *, LValue *> Pos;
4906  unsigned Idx = 1;
4907  LValue PosLVal;
4908  if (Dependencies.IteratorExpr) {
4909  PosLVal = CGF.MakeAddrLValue(
4910  CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4911  C.getSizeType());
4912  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4913  /*IsInit=*/true);
4914  Pos = &PosLVal;
4915  } else {
4916  Pos = &Idx;
4917  }
4918  emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4919  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4920  CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4921  CGF.Int8Ty);
4922  return DependenciesArray;
4923 }
4924 
4926  SourceLocation Loc) {
4927  ASTContext &C = CGM.getContext();
4928  QualType FlagsTy;
4929  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4931  DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4932  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4934  Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4935  CGF.ConvertTypeForMem(KmpDependInfoTy));
4936  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4937  Addr.getElementType(), Addr.getPointer(),
4938  llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4939  DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4940  CGF.VoidPtrTy);
4941  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4942  // Use default allocator.
4943  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4944  llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4945 
4946  // _kmpc_free(gtid, addr, nullptr);
4947  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4948  CGM.getModule(), OMPRTL___kmpc_free),
4949  Args);
4950 }
4951 
4953  OpenMPDependClauseKind NewDepKind,
4954  SourceLocation Loc) {
4955  ASTContext &C = CGM.getContext();
4956  QualType FlagsTy;
4957  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4958  RecordDecl *KmpDependInfoRD =
4959  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4960  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4961  llvm::Value *NumDeps;
4962  LValue Base;
4963  std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4964 
4965  Address Begin = Base.getAddress(CGF);
4966  // Cast from pointer to array type to pointer to single element.
4967  llvm::Value *End = CGF.Builder.CreateGEP(
4968  Begin.getElementType(), Begin.getPointer(), NumDeps);
4969  // The basic structure here is a while-do loop.