clang  16.0.0git
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGOpenMPRuntime.h"
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include <cassert>
44 #include <numeric>
45 
46 using namespace clang;
47 using namespace CodeGen;
48 using namespace llvm::omp;
49 
50 namespace {
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53 public:
54  /// Kinds of OpenMP regions used in codegen.
55  enum CGOpenMPRegionKind {
56  /// Region with outlined function for standalone 'parallel'
57  /// directive.
58  ParallelOutlinedRegion,
59  /// Region with outlined function for standalone 'task' directive.
60  TaskOutlinedRegion,
61  /// Region for constructs that do not require function outlining,
62  /// like 'for', 'sections', 'atomic' etc. directives.
63  InlinedRegion,
64  /// Region with outlined function for standalone 'target' directive.
65  TargetRegion,
66  };
67 
68  CGOpenMPRegionInfo(const CapturedStmt &CS,
69  const CGOpenMPRegionKind RegionKind,
71  bool HasCancel)
72  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74 
75  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
77  bool HasCancel)
78  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79  Kind(Kind), HasCancel(HasCancel) {}
80 
81  /// Get a variable or parameter for storing global thread id
82  /// inside OpenMP construct.
83  virtual const VarDecl *getThreadIDVariable() const = 0;
84 
85  /// Emit the captured statement body.
86  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87 
88  /// Get an LValue for the current ThreadID variable.
89  /// \return LValue for thread id variable. This LValue always has type int32*.
90  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91 
92  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93 
94  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95 
96  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97 
98  bool hasCancel() const { return HasCancel; }
99 
100  static bool classof(const CGCapturedStmtInfo *Info) {
101  return Info->getKind() == CR_OpenMP;
102  }
103 
104  ~CGOpenMPRegionInfo() override = default;
105 
106 protected:
107  CGOpenMPRegionKind RegionKind;
108  RegionCodeGenTy CodeGen;
110  bool HasCancel;
111 };
112 
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115 public:
116  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117  const RegionCodeGenTy &CodeGen,
118  OpenMPDirectiveKind Kind, bool HasCancel,
119  StringRef HelperName)
120  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121  HasCancel),
122  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124  }
125 
126  /// Get a variable or parameter for storing global thread id
127  /// inside OpenMP construct.
128  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129 
130  /// Get the name of the capture helper.
131  StringRef getHelperName() const override { return HelperName; }
132 
133  static bool classof(const CGCapturedStmtInfo *Info) {
134  return CGOpenMPRegionInfo::classof(Info) &&
135  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136  ParallelOutlinedRegion;
137  }
138 
139 private:
140  /// A variable or parameter storing global thread id for OpenMP
141  /// constructs.
142  const VarDecl *ThreadIDVar;
143  StringRef HelperName;
144 };
145 
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148 public:
149  class UntiedTaskActionTy final : public PrePostActionTy {
150  bool Untied;
151  const VarDecl *PartIDVar;
152  const RegionCodeGenTy UntiedCodeGen;
153  llvm::SwitchInst *UntiedSwitch = nullptr;
154 
155  public:
156  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157  const RegionCodeGenTy &UntiedCodeGen)
158  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159  void Enter(CodeGenFunction &CGF) override {
160  if (Untied) {
161  // Emit task switching point.
162  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163  CGF.GetAddrOfLocalVar(PartIDVar),
164  PartIDVar->getType()->castAs<PointerType>());
165  llvm::Value *Res =
166  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169  CGF.EmitBlock(DoneBB);
171  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173  CGF.Builder.GetInsertBlock());
174  emitUntiedSwitch(CGF);
175  }
176  }
177  void emitUntiedSwitch(CodeGenFunction &CGF) const {
178  if (Untied) {
179  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180  CGF.GetAddrOfLocalVar(PartIDVar),
181  PartIDVar->getType()->castAs<PointerType>());
182  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183  PartIdLVal);
184  UntiedCodeGen(CGF);
185  CodeGenFunction::JumpDest CurPoint =
186  CGF.getJumpDestInCurrentScope(".untied.next.");
187  CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190  CGF.Builder.GetInsertBlock());
191  CGF.EmitBranchThroughCleanup(CurPoint);
192  CGF.EmitBlock(CurPoint.getBlock());
193  }
194  }
195  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196  };
197  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198  const VarDecl *ThreadIDVar,
199  const RegionCodeGenTy &CodeGen,
200  OpenMPDirectiveKind Kind, bool HasCancel,
201  const UntiedTaskActionTy &Action)
202  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203  ThreadIDVar(ThreadIDVar), Action(Action) {
204  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205  }
206 
207  /// Get a variable or parameter for storing global thread id
208  /// inside OpenMP construct.
209  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210 
211  /// Get an LValue for the current ThreadID variable.
212  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213 
214  /// Get the name of the capture helper.
215  StringRef getHelperName() const override { return ".omp_outlined."; }
216 
217  void emitUntiedSwitch(CodeGenFunction &CGF) override {
218  Action.emitUntiedSwitch(CGF);
219  }
220 
221  static bool classof(const CGCapturedStmtInfo *Info) {
222  return CGOpenMPRegionInfo::classof(Info) &&
223  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224  TaskOutlinedRegion;
225  }
226 
227 private:
228  /// A variable or parameter storing global thread id for OpenMP
229  /// constructs.
230  const VarDecl *ThreadIDVar;
231  /// Action for emitting code for untied tasks.
232  const UntiedTaskActionTy &Action;
233 };
234 
235 /// API for inlined captured statement code generation in OpenMP
236 /// constructs.
237 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238 public:
239  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240  const RegionCodeGenTy &CodeGen,
241  OpenMPDirectiveKind Kind, bool HasCancel)
242  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243  OldCSI(OldCSI),
244  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245 
246  // Retrieve the value of the context parameter.
247  llvm::Value *getContextValue() const override {
248  if (OuterRegionInfo)
249  return OuterRegionInfo->getContextValue();
250  llvm_unreachable("No context value for inlined OpenMP region");
251  }
252 
253  void setContextValue(llvm::Value *V) override {
254  if (OuterRegionInfo) {
255  OuterRegionInfo->setContextValue(V);
256  return;
257  }
258  llvm_unreachable("No context value for inlined OpenMP region");
259  }
260 
261  /// Lookup the captured field decl for a variable.
262  const FieldDecl *lookup(const VarDecl *VD) const override {
263  if (OuterRegionInfo)
264  return OuterRegionInfo->lookup(VD);
265  // If there is no outer outlined region,no need to lookup in a list of
266  // captured variables, we can use the original one.
267  return nullptr;
268  }
269 
270  FieldDecl *getThisFieldDecl() const override {
271  if (OuterRegionInfo)
272  return OuterRegionInfo->getThisFieldDecl();
273  return nullptr;
274  }
275 
276  /// Get a variable or parameter for storing global thread id
277  /// inside OpenMP construct.
278  const VarDecl *getThreadIDVariable() const override {
279  if (OuterRegionInfo)
280  return OuterRegionInfo->getThreadIDVariable();
281  return nullptr;
282  }
283 
284  /// Get an LValue for the current ThreadID variable.
285  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286  if (OuterRegionInfo)
287  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288  llvm_unreachable("No LValue for inlined OpenMP construct");
289  }
290 
291  /// Get the name of the capture helper.
292  StringRef getHelperName() const override {
293  if (auto *OuterRegionInfo = getOldCSI())
294  return OuterRegionInfo->getHelperName();
295  llvm_unreachable("No helper name for inlined OpenMP construct");
296  }
297 
298  void emitUntiedSwitch(CodeGenFunction &CGF) override {
299  if (OuterRegionInfo)
300  OuterRegionInfo->emitUntiedSwitch(CGF);
301  }
302 
303  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304 
305  static bool classof(const CGCapturedStmtInfo *Info) {
306  return CGOpenMPRegionInfo::classof(Info) &&
307  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308  }
309 
310  ~CGOpenMPInlinedRegionInfo() override = default;
311 
312 private:
313  /// CodeGen info about outer OpenMP region.
315  CGOpenMPRegionInfo *OuterRegionInfo;
316 };
317 
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324 public:
325  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326  const RegionCodeGenTy &CodeGen, StringRef HelperName)
327  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328  /*HasCancel=*/false),
329  HelperName(HelperName) {}
330 
331  /// This is unused for target regions because each starts executing
332  /// with a single thread.
333  const VarDecl *getThreadIDVariable() const override { return nullptr; }
334 
335  /// Get the name of the capture helper.
336  StringRef getHelperName() const override { return HelperName; }
337 
338  static bool classof(const CGCapturedStmtInfo *Info) {
339  return CGOpenMPRegionInfo::classof(Info) &&
340  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341  }
342 
343 private:
344  StringRef HelperName;
345 };
346 
347 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348  llvm_unreachable("No codegen for expressions");
349 }
350 /// API for generation of expressions captured in a innermost OpenMP
351 /// region.
352 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353 public:
354  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356  OMPD_unknown,
357  /*HasCancel=*/false),
358  PrivScope(CGF) {
359  // Make sure the globals captured in the provided statement are local by
360  // using the privatization logic. We assume the same variable is not
361  // captured more than once.
362  for (const auto &C : CS.captures()) {
363  if (!C.capturesVariable() && !C.capturesVariableByCopy())
364  continue;
365 
366  const VarDecl *VD = C.getCapturedVar();
367  if (VD->isLocalVarDeclOrParm())
368  continue;
369 
370  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371  /*RefersToEnclosingVariableOrCapture=*/false,
373  C.getLocation());
374  PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375  }
376  (void)PrivScope.Privatize();
377  }
378 
379  /// Lookup the captured field decl for a variable.
380  const FieldDecl *lookup(const VarDecl *VD) const override {
381  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382  return FD;
383  return nullptr;
384  }
385 
386  /// Emit the captured statement body.
387  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388  llvm_unreachable("No body for expressions");
389  }
390 
391  /// Get a variable or parameter for storing global thread id
392  /// inside OpenMP construct.
393  const VarDecl *getThreadIDVariable() const override {
394  llvm_unreachable("No thread id for expressions");
395  }
396 
397  /// Get the name of the capture helper.
398  StringRef getHelperName() const override {
399  llvm_unreachable("No helper name for expressions");
400  }
401 
402  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403 
404 private:
405  /// Private scope to capture global variables.
407 };
408 
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII {
411  CodeGenFunction &CGF;
412  llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413  FieldDecl *LambdaThisCaptureField = nullptr;
414  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415  bool NoInheritance = false;
416 
417 public:
418  /// Constructs region for combined constructs.
419  /// \param CodeGen Code generation sequence for combined directives. Includes
420  /// a list of functions used for code generation of implicitly inlined
421  /// regions.
422  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423  OpenMPDirectiveKind Kind, bool HasCancel,
424  bool NoInheritance = true)
425  : CGF(CGF), NoInheritance(NoInheritance) {
426  // Start emission for the construct.
427  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429  if (NoInheritance) {
430  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432  CGF.LambdaThisCaptureField = nullptr;
433  BlockInfo = CGF.BlockInfo;
434  CGF.BlockInfo = nullptr;
435  }
436  }
437 
438  ~InlinedOpenMPRegionRAII() {
439  // Restore original CapturedStmtInfo only if we're done with code emission.
440  auto *OldCSI =
441  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442  delete CGF.CapturedStmtInfo;
443  CGF.CapturedStmtInfo = OldCSI;
444  if (NoInheritance) {
445  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447  CGF.BlockInfo = BlockInfo;
448  }
449  }
450 };
451 
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags : unsigned {
456  /// Use trampoline for internal microtask.
457  OMP_IDENT_IMD = 0x01,
458  /// Use c-style ident structure.
459  OMP_IDENT_KMPC = 0x02,
460  /// Atomic reduction option for kmpc_reduce.
461  OMP_ATOMIC_REDUCE = 0x10,
462  /// Explicit 'barrier' directive.
463  OMP_IDENT_BARRIER_EXPL = 0x20,
464  /// Implicit barrier in code.
465  OMP_IDENT_BARRIER_IMPL = 0x40,
466  /// Implicit barrier in 'for' directive.
467  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468  /// Implicit barrier in 'sections' directive.
469  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470  /// Implicit barrier in 'single' directive.
471  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472  /// Call of __kmp_for_static_init for static loop.
473  OMP_IDENT_WORK_LOOP = 0x200,
474  /// Call of __kmp_for_static_init for sections.
475  OMP_IDENT_WORK_SECTIONS = 0x400,
476  /// Call of __kmp_for_static_init for distribute.
477  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479 };
480 
481 namespace {
483 /// Values for bit flags for marking which requires clauses have been used.
485  /// flag undefined.
486  OMP_REQ_UNDEFINED = 0x000,
487  /// no requires clause present.
488  OMP_REQ_NONE = 0x001,
489  /// reverse_offload clause.
490  OMP_REQ_REVERSE_OFFLOAD = 0x002,
491  /// unified_address clause.
492  OMP_REQ_UNIFIED_ADDRESS = 0x004,
493  /// unified_shared_memory clause.
494  OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
495  /// dynamic_allocators clause.
496  OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
497  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
498 };
499 
501  /// Device ID if the device was not defined, runtime should get it
502  /// from environment variables in the spec.
503  OMP_DEVICEID_UNDEF = -1,
504 };
505 } // anonymous namespace
506 
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 /// kmp_int32 reserved_1; /**< might be used in Fortran;
513 /// see above */
514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
515 /// KMP_IDENT_KMPC identifies this union
516 /// member */
517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
518 /// see above */
519 ///#if USE_ITT_BUILD
520 /// /* but currently used for storing
521 /// region-specific ITT */
522 /// /* contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
525 /// C++ */
526 /// char const *psource; /**< String describing the source location.
527 /// The string is composed of semi-colon separated
528 // fields which describe the source file,
529 /// the function and a pair of line numbers that
530 /// delimit the construct.
531 /// */
532 /// } ident_t;
533 enum IdentFieldIndex {
534  /// might be used in Fortran
535  IdentField_Reserved_1,
536  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537  IdentField_Flags,
538  /// Not really used in Fortran any more
539  IdentField_Reserved_2,
540  /// Source[4] in Fortran, do not use for C++
541  IdentField_Reserved_3,
542  /// String describing the source location. The string is composed of
543  /// semi-colon separated fields which describe the source file, the function
544  /// and a pair of line numbers that delimit the construct.
545  IdentField_PSource
546 };
547 
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType {
551  /// Lower bound for default (unordered) versions.
552  OMP_sch_lower = 32,
553  OMP_sch_static_chunked = 33,
554  OMP_sch_static = 34,
555  OMP_sch_dynamic_chunked = 35,
556  OMP_sch_guided_chunked = 36,
557  OMP_sch_runtime = 37,
558  OMP_sch_auto = 38,
559  /// static with chunk adjustment (e.g., simd)
560  OMP_sch_static_balanced_chunked = 45,
561  /// Lower bound for 'ordered' versions.
562  OMP_ord_lower = 64,
563  OMP_ord_static_chunked = 65,
564  OMP_ord_static = 66,
565  OMP_ord_dynamic_chunked = 67,
566  OMP_ord_guided_chunked = 68,
567  OMP_ord_runtime = 69,
568  OMP_ord_auto = 70,
569  OMP_sch_default = OMP_sch_static,
570  /// dist_schedule types
571  OMP_dist_sch_static_chunked = 91,
572  OMP_dist_sch_static = 92,
573  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574  /// Set if the monotonic schedule modifier was present.
575  OMP_sch_modifier_monotonic = (1 << 29),
576  /// Set if the nonmonotonic schedule modifier was present.
577  OMP_sch_modifier_nonmonotonic = (1 << 30),
578 };
579 
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581 /// region.
582 class CleanupTy final : public EHScopeStack::Cleanup {
583  PrePostActionTy *Action;
584 
585 public:
586  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588  if (!CGF.HaveInsertPoint())
589  return;
590  Action->Exit(CGF);
591  }
592 };
593 
594 } // anonymous namespace
595 
598  if (PrePostAction) {
599  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600  Callback(CodeGen, CGF, *PrePostAction);
601  } else {
602  PrePostActionTy Action;
603  Callback(CodeGen, CGF, Action);
604  }
605 }
606 
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl *
610 getReductionInit(const Expr *ReductionOp) {
611  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613  if (const auto *DRE =
614  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616  return DRD;
617  return nullptr;
618 }
619 
621  const OMPDeclareReductionDecl *DRD,
622  const Expr *InitOp,
623  Address Private, Address Original,
624  QualType Ty) {
625  if (DRD->getInitializer()) {
626  std::pair<llvm::Function *, llvm::Function *> Reduction =
628  const auto *CE = cast<CallExpr>(InitOp);
629  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632  const auto *LHSDRE =
633  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634  const auto *RHSDRE =
635  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639  (void)PrivateScope.Privatize();
640  RValue Func = RValue::get(Reduction.second);
641  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642  CGF.EmitIgnoredExpr(InitOp);
643  } else {
644  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646  auto *GV = new llvm::GlobalVariable(
647  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648  llvm::GlobalValue::PrivateLinkage, Init, Name);
649  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650  RValue InitRVal;
651  switch (CGF.getEvaluationKind(Ty)) {
652  case TEK_Scalar:
653  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654  break;
655  case TEK_Complex:
656  InitRVal =
658  break;
659  case TEK_Aggregate: {
660  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663  /*IsInitializer=*/false);
664  return;
665  }
666  }
667  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670  /*IsInitializer=*/false);
671  }
672 }
673 
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680  QualType Type, bool EmitDeclareReductionInit,
681  const Expr *Init,
682  const OMPDeclareReductionDecl *DRD,
683  Address SrcAddr = Address::invalid()) {
684  // Perform element-by-element initialization.
685  QualType ElementTy;
686 
687  // Drill down to the base element type on both arrays.
688  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690  if (DRD)
691  SrcAddr =
692  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693 
694  llvm::Value *SrcBegin = nullptr;
695  if (DRD)
696  SrcBegin = SrcAddr.getPointer();
697  llvm::Value *DestBegin = DestAddr.getPointer();
698  // Cast from pointer to array type to pointer to single element.
699  llvm::Value *DestEnd =
700  CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701  // The basic structure here is a while-do loop.
702  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704  llvm::Value *IsEmpty =
705  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707 
708  // Enter the loop body, making that address the current address.
709  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710  CGF.EmitBlock(BodyBB);
711 
712  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713 
714  llvm::PHINode *SrcElementPHI = nullptr;
715  Address SrcElementCurrent = Address::invalid();
716  if (DRD) {
717  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718  "omp.arraycpy.srcElementPast");
719  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720  SrcElementCurrent =
721  Address(SrcElementPHI, SrcAddr.getElementType(),
722  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723  }
724  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726  DestElementPHI->addIncoming(DestBegin, EntryBB);
727  Address DestElementCurrent =
728  Address(DestElementPHI, DestAddr.getElementType(),
729  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730 
731  // Emit copy.
732  {
733  CodeGenFunction::RunCleanupsScope InitScope(CGF);
734  if (EmitDeclareReductionInit) {
735  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736  SrcElementCurrent, ElementTy);
737  } else
738  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739  /*IsInitializer=*/false);
740  }
741 
742  if (DRD) {
743  // Shift the address forward by one element.
744  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745  SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746  "omp.arraycpy.dest.element");
747  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748  }
749 
750  // Shift the address forward by one element.
751  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752  DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753  "omp.arraycpy.dest.element");
754  // Check whether we've reached the end.
755  llvm::Value *Done =
756  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759 
760  // Done.
761  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762 }
763 
764 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765  return CGF.EmitOMPSharedLValue(E);
766 }
767 
768 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769  const Expr *E) {
770  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772  return LValue();
773 }
774 
775 void ReductionCodeGen::emitAggregateInitialization(
776  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777  const OMPDeclareReductionDecl *DRD) {
778  // Emit VarDecl with copy init for arrays.
779  // Get the address of the original variable captured in current
780  // captured region.
781  const auto *PrivateVD =
782  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783  bool EmitDeclareReductionInit =
784  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786  EmitDeclareReductionInit,
787  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788  : PrivateVD->getInit(),
789  DRD, SharedAddr);
790 }
791 
794  ArrayRef<const Expr *> Privates,
795  ArrayRef<const Expr *> ReductionOps) {
796  ClausesData.reserve(Shareds.size());
797  SharedAddresses.reserve(Shareds.size());
798  Sizes.reserve(Shareds.size());
799  BaseDecls.reserve(Shareds.size());
800  const auto *IOrig = Origs.begin();
801  const auto *IPriv = Privates.begin();
802  const auto *IRed = ReductionOps.begin();
803  for (const Expr *Ref : Shareds) {
804  ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805  std::advance(IOrig, 1);
806  std::advance(IPriv, 1);
807  std::advance(IRed, 1);
808  }
809 }
810 
812  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
813  "Number of generated lvalues must be exactly N.");
814  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816  SharedAddresses.emplace_back(First, Second);
817  if (ClausesData[N].Shared == ClausesData[N].Ref) {
818  OrigAddresses.emplace_back(First, Second);
819  } else {
820  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822  OrigAddresses.emplace_back(First, Second);
823  }
824 }
825 
827  QualType PrivateType = getPrivateType(N);
828  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829  if (!PrivateType->isVariablyModifiedType()) {
830  Sizes.emplace_back(
831  CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832  nullptr);
833  return;
834  }
835  llvm::Value *Size;
836  llvm::Value *SizeInChars;
837  auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839  if (AsArraySection) {
840  Size = CGF.Builder.CreatePtrDiff(ElemType,
841  OrigAddresses[N].second.getPointer(CGF),
842  OrigAddresses[N].first.getPointer(CGF));
843  Size = CGF.Builder.CreateNUWAdd(
844  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846  } else {
847  SizeInChars =
848  CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850  }
851  Sizes.emplace_back(SizeInChars, Size);
853  CGF,
854  cast<OpaqueValueExpr>(
855  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856  RValue::get(Size));
857  CGF.EmitVariablyModifiedType(PrivateType);
858 }
859 
861  llvm::Value *Size) {
862  QualType PrivateType = getPrivateType(N);
863  if (!PrivateType->isVariablyModifiedType()) {
864  assert(!Size && !Sizes[N].second &&
865  "Size should be nullptr for non-variably modified reduction "
866  "items.");
867  return;
868  }
870  CGF,
871  cast<OpaqueValueExpr>(
872  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873  RValue::get(Size));
874  CGF.EmitVariablyModifiedType(PrivateType);
875 }
876 
878  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880  assert(SharedAddresses.size() > N && "No variable was generated");
881  const auto *PrivateVD =
882  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883  const OMPDeclareReductionDecl *DRD =
884  getReductionInit(ClausesData[N].ReductionOp);
885  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886  if (DRD && DRD->getInitializer())
887  (void)DefaultInit(CGF);
888  emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890  (void)DefaultInit(CGF);
891  QualType SharedType = SharedAddresses[N].first.getType();
892  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893  PrivateAddr, SharedAddr, SharedType);
894  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897  PrivateVD->getType().getQualifiers(),
898  /*IsInitializer=*/false);
899  }
900 }
901 
903  QualType PrivateType = getPrivateType(N);
904  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905  return DTorKind != QualType::DK_none;
906 }
907 
909  Address PrivateAddr) {
910  QualType PrivateType = getPrivateType(N);
911  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912  if (needCleanups(N)) {
913  PrivateAddr = CGF.Builder.CreateElementBitCast(
914  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916  }
917 }
918 
920  LValue BaseLV) {
921  BaseTy = BaseTy.getNonReferenceType();
922  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926  } else {
927  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929  }
930  BaseTy = BaseTy->getPointeeType();
931  }
932  return CGF.MakeAddrLValue(
933  CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934  CGF.ConvertTypeForMem(ElTy)),
935  BaseLV.getType(), BaseLV.getBaseInfo(),
936  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937 }
938 
940  Address OriginalBaseAddress, llvm::Value *Addr) {
941  Address Tmp = Address::invalid();
942  Address TopTmp = Address::invalid();
943  Address MostTopTmp = Address::invalid();
944  BaseTy = BaseTy.getNonReferenceType();
945  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947  Tmp = CGF.CreateMemTemp(BaseTy);
948  if (TopTmp.isValid())
949  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950  else
951  MostTopTmp = Tmp;
952  TopTmp = Tmp;
953  BaseTy = BaseTy->getPointeeType();
954  }
955 
956  if (Tmp.isValid()) {
958  Addr, Tmp.getElementType());
959  CGF.Builder.CreateStore(Addr, Tmp);
960  return MostTopTmp;
961  }
962 
964  Addr, OriginalBaseAddress.getType());
965  return OriginalBaseAddress.withPointer(Addr);
966 }
967 
968 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969  const VarDecl *OrigVD = nullptr;
970  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973  Base = TempOASE->getBase()->IgnoreParenImpCasts();
974  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975  Base = TempASE->getBase()->IgnoreParenImpCasts();
976  DE = cast<DeclRefExpr>(Base);
977  OrigVD = cast<VarDecl>(DE->getDecl());
978  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981  Base = TempASE->getBase()->IgnoreParenImpCasts();
982  DE = cast<DeclRefExpr>(Base);
983  OrigVD = cast<VarDecl>(DE->getDecl());
984  }
985  return OrigVD;
986 }
987 
989  Address PrivateAddr) {
990  const DeclRefExpr *DE;
991  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992  BaseDecls.emplace_back(OrigVD);
993  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994  LValue BaseLValue =
995  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996  OriginalBaseLValue);
997  Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999  SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000  SharedAddr.getPointer());
1001  llvm::Value *PrivatePointer =
1003  PrivateAddr.getPointer(), SharedAddr.getType());
1004  llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005  SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006  return castToBase(CGF, OrigVD->getType(),
1007  SharedAddresses[N].first.getType(),
1008  OriginalBaseLValue.getAddress(CGF), Ptr);
1009  }
1010  BaseDecls.emplace_back(
1011  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012  return PrivateAddr;
1013 }
1014 
1016  const OMPDeclareReductionDecl *DRD =
1017  getReductionInit(ClausesData[N].ReductionOp);
1018  return DRD && DRD->getInitializer();
1019 }
1020 
1021 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022  return CGF.EmitLoadOfPointerLValue(
1023  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024  getThreadIDVariable()->getType()->castAs<PointerType>());
1025 }
1026 
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028  if (!CGF.HaveInsertPoint())
1029  return;
1030  // 1.2.2 OpenMP Language Terminology
1031  // Structured block - An executable statement with a single entry at the
1032  // top and a single exit at the bottom.
1033  // The point of exit cannot be a branch out of the structured block.
1034  // longjmp() and throw() must not violate the entry/exit criteria.
1035  CGF.EHStack.pushTerminate();
1036  if (S)
1037  CGF.incrementProfileCounter(S);
1038  CodeGen(CGF);
1039  CGF.EHStack.popTerminate();
1040 }
1041 
1042 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043  CodeGenFunction &CGF) {
1044  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045  getThreadIDVariable()->getType(),
1047 }
1048 
1050  QualType FieldTy) {
1051  auto *Field = FieldDecl::Create(
1052  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053  C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055  Field->setAccess(AS_public);
1056  DC->addDecl(Field);
1057  return Field;
1058 }
1059 
1061  : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() {
1062  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1063  llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false,
1065  // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1066  OMPBuilder.initialize();
1067  OMPBuilder.setConfig(Config);
1068  OffloadEntriesInfoManager.setConfig(Config);
1070 }
1071 
1073  InternalVars.clear();
1074  // Clean non-target variable declarations possibly used only in debug info.
1075  for (const auto &Data : EmittedNonTargetVariables) {
1076  if (!Data.getValue().pointsToAliveValue())
1077  continue;
1078  auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1079  if (!GV)
1080  continue;
1081  if (!GV->isDeclaration() || GV->getNumUses() > 0)
1082  continue;
1083  GV->eraseFromParent();
1084  }
1085 }
1086 
1088  return OMPBuilder.createPlatformSpecificName(Parts);
1089 }
1090 
1091 static llvm::Function *
1093  const Expr *CombinerInitializer, const VarDecl *In,
1094  const VarDecl *Out, bool IsCombiner) {
1095  // void .omp_combiner.(Ty *in, Ty *out);
1096  ASTContext &C = CGM.getContext();
1097  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1098  FunctionArgList Args;
1099  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1100  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1101  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1102  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1103  Args.push_back(&OmpOutParm);
1104  Args.push_back(&OmpInParm);
1105  const CGFunctionInfo &FnInfo =
1106  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1107  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1108  std::string Name = CGM.getOpenMPRuntime().getName(
1109  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1110  auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1111  Name, &CGM.getModule());
1112  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1113  if (CGM.getLangOpts().Optimize) {
1114  Fn->removeFnAttr(llvm::Attribute::NoInline);
1115  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1116  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1117  }
1118  CodeGenFunction CGF(CGM);
1119  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1120  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1121  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1122  Out->getLocation());
1124  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1125  Scope.addPrivate(
1126  In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1127  .getAddress(CGF));
1128  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1129  Scope.addPrivate(
1130  Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1131  .getAddress(CGF));
1132  (void)Scope.Privatize();
1133  if (!IsCombiner && Out->hasInit() &&
1134  !CGF.isTrivialInitializer(Out->getInit())) {
1135  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1136  Out->getType().getQualifiers(),
1137  /*IsInitializer=*/true);
1138  }
1139  if (CombinerInitializer)
1140  CGF.EmitIgnoredExpr(CombinerInitializer);
1141  Scope.ForceCleanup();
1142  CGF.FinishFunction();
1143  return Fn;
1144 }
1145 
1147  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1148  if (UDRMap.count(D) > 0)
1149  return;
1150  llvm::Function *Combiner = emitCombinerOrInitializer(
1151  CGM, D->getType(), D->getCombiner(),
1152  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1153  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1154  /*IsCombiner=*/true);
1155  llvm::Function *Initializer = nullptr;
1156  if (const Expr *Init = D->getInitializer()) {
1158  CGM, D->getType(),
1160  : nullptr,
1161  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1162  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1163  /*IsCombiner=*/false);
1164  }
1165  UDRMap.try_emplace(D, Combiner, Initializer);
1166  if (CGF) {
1167  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1168  Decls.second.push_back(D);
1169  }
1170 }
1171 
1172 std::pair<llvm::Function *, llvm::Function *>
1174  auto I = UDRMap.find(D);
1175  if (I != UDRMap.end())
1176  return I->second;
1177  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1178  return UDRMap.lookup(D);
1179 }
1180 
1181 namespace {
1182 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1183 // Builder if one is present.
1184 struct PushAndPopStackRAII {
1185  PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1186  bool HasCancel, llvm::omp::Directive Kind)
1187  : OMPBuilder(OMPBuilder) {
1188  if (!OMPBuilder)
1189  return;
1190 
1191  // The following callback is the crucial part of clangs cleanup process.
1192  //
1193  // NOTE:
1194  // Once the OpenMPIRBuilder is used to create parallel regions (and
1195  // similar), the cancellation destination (Dest below) is determined via
1196  // IP. That means if we have variables to finalize we split the block at IP,
1197  // use the new block (=BB) as destination to build a JumpDest (via
1198  // getJumpDestInCurrentScope(BB)) which then is fed to
1199  // EmitBranchThroughCleanup. Furthermore, there will not be the need
1200  // to push & pop an FinalizationInfo object.
1201  // The FiniCB will still be needed but at the point where the
1202  // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1203  auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1204  assert(IP.getBlock()->end() == IP.getPoint() &&
1205  "Clang CG should cause non-terminated block!");
1206  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1207  CGF.Builder.restoreIP(IP);
1209  CGF.getOMPCancelDestination(OMPD_parallel);
1210  CGF.EmitBranchThroughCleanup(Dest);
1211  };
1212 
1213  // TODO: Remove this once we emit parallel regions through the
1214  // OpenMPIRBuilder as it can do this setup internally.
1215  llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1216  OMPBuilder->pushFinalizationCB(std::move(FI));
1217  }
1218  ~PushAndPopStackRAII() {
1219  if (OMPBuilder)
1220  OMPBuilder->popFinalizationCB();
1221  }
1222  llvm::OpenMPIRBuilder *OMPBuilder;
1223 };
1224 } // namespace
1225 
1227  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1228  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1229  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1230  assert(ThreadIDVar->getType()->isPointerType() &&
1231  "thread id variable must be of type kmp_int32 *");
1232  CodeGenFunction CGF(CGM, true);
1233  bool HasCancel = false;
1234  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1235  HasCancel = OPD->hasCancel();
1236  else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1237  HasCancel = OPD->hasCancel();
1238  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1239  HasCancel = OPSD->hasCancel();
1240  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1241  HasCancel = OPFD->hasCancel();
1242  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1243  HasCancel = OPFD->hasCancel();
1244  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1245  HasCancel = OPFD->hasCancel();
1246  else if (const auto *OPFD =
1247  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1248  HasCancel = OPFD->hasCancel();
1249  else if (const auto *OPFD =
1250  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1251  HasCancel = OPFD->hasCancel();
1252 
1253  // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1254  // parallel region to make cancellation barriers work properly.
1255  llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1256  PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1257  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1258  HasCancel, OutlinedHelperName);
1259  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1260  return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1261 }
1262 
1264  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1265  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1266  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1268  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1269 }
1270 
1272  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1273  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1274  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1276  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1277 }
1278 
1280  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1281  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1282  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1283  bool Tied, unsigned &NumberOfParts) {
1284  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1285  PrePostActionTy &) {
1286  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1287  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1288  llvm::Value *TaskArgs[] = {
1289  UpLoc, ThreadID,
1290  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1291  TaskTVar->getType()->castAs<PointerType>())
1292  .getPointer(CGF)};
1293  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1294  CGM.getModule(), OMPRTL___kmpc_omp_task),
1295  TaskArgs);
1296  };
1297  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1298  UntiedCodeGen);
1299  CodeGen.setAction(Action);
1300  assert(!ThreadIDVar->getType()->isPointerType() &&
1301  "thread id variable must be of type kmp_int32 for tasks");
1302  const OpenMPDirectiveKind Region =
1303  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1304  : OMPD_task;
1305  const CapturedStmt *CS = D.getCapturedStmt(Region);
1306  bool HasCancel = false;
1307  if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1308  HasCancel = TD->hasCancel();
1309  else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1310  HasCancel = TD->hasCancel();
1311  else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1312  HasCancel = TD->hasCancel();
1313  else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1314  HasCancel = TD->hasCancel();
1315 
1316  CodeGenFunction CGF(CGM, true);
1317  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1318  InnermostKind, HasCancel, Action);
1319  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1320  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1321  if (!Tied)
1322  NumberOfParts = Action.getNumberOfParts();
1323  return Res;
1324 }
1325 
1327  bool AtCurrentPoint) {
1328  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1329  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1330 
1331  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1332  if (AtCurrentPoint) {
1333  Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1334  Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1335  } else {
1336  Elem.second.ServiceInsertPt =
1337  new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1338  Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1339  }
1340 }
1341 
1343  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1344  if (Elem.second.ServiceInsertPt) {
1345  llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1346  Elem.second.ServiceInsertPt = nullptr;
1347  Ptr->eraseFromParent();
1348  }
1349 }
1350 
1352  SourceLocation Loc,
1353  SmallString<128> &Buffer) {
1354  llvm::raw_svector_ostream OS(Buffer);
1355  // Build debug location
1357  OS << ";" << PLoc.getFilename() << ";";
1358  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1359  OS << FD->getQualifiedNameAsString();
1360  OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1361  return OS.str();
1362 }
1363 
1365  SourceLocation Loc,
1366  unsigned Flags) {
1367  uint32_t SrcLocStrSize;
1368  llvm::Constant *SrcLocStr;
1369  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1370  Loc.isInvalid()) {
1371  SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1372  } else {
1373  std::string FunctionName;
1374  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1375  FunctionName = FD->getQualifiedNameAsString();
1377  const char *FileName = PLoc.getFilename();
1378  unsigned Line = PLoc.getLine();
1379  unsigned Column = PLoc.getColumn();
1380  SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1381  Column, SrcLocStrSize);
1382  }
1383  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1384  return OMPBuilder.getOrCreateIdent(
1385  SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1386 }
1387 
1389  SourceLocation Loc) {
1390  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1391  // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1392  // the clang invariants used below might be broken.
1393  if (CGM.getLangOpts().OpenMPIRBuilder) {
1394  SmallString<128> Buffer;
1395  OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1396  uint32_t SrcLocStrSize;
1397  auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1398  getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1399  return OMPBuilder.getOrCreateThreadID(
1400  OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1401  }
1402 
1403  llvm::Value *ThreadID = nullptr;
1404  // Check whether we've already cached a load of the thread id in this
1405  // function.
1406  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1407  if (I != OpenMPLocThreadIDMap.end()) {
1408  ThreadID = I->second.ThreadID;
1409  if (ThreadID != nullptr)
1410  return ThreadID;
1411  }
1412  // If exceptions are enabled, do not use parameter to avoid possible crash.
1413  if (auto *OMPRegionInfo =
1414  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1415  if (OMPRegionInfo->getThreadIDVariable()) {
1416  // Check if this an outlined function with thread id passed as argument.
1417  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1418  llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1419  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1420  !CGF.getLangOpts().CXXExceptions ||
1421  CGF.Builder.GetInsertBlock() == TopBlock ||
1422  !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1423  cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1424  TopBlock ||
1425  cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1426  CGF.Builder.GetInsertBlock()) {
1427  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1428  // If value loaded in entry block, cache it and use it everywhere in
1429  // function.
1430  if (CGF.Builder.GetInsertBlock() == TopBlock) {
1431  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1432  Elem.second.ThreadID = ThreadID;
1433  }
1434  return ThreadID;
1435  }
1436  }
1437  }
1438 
1439  // This is not an outlined function region - need to call __kmpc_int32
1440  // kmpc_global_thread_num(ident_t *loc).
1441  // Generate thread id value and cache this value for use across the
1442  // function.
1443  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1444  if (!Elem.second.ServiceInsertPt)
1446  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1447  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1448  llvm::CallInst *Call = CGF.Builder.CreateCall(
1449  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1450  OMPRTL___kmpc_global_thread_num),
1451  emitUpdateLocation(CGF, Loc));
1452  Call->setCallingConv(CGF.getRuntimeCC());
1453  Elem.second.ThreadID = Call;
1454  return Call;
1455 }
1456 
1458  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1459  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1461  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1462  }
1463  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1464  for(const auto *D : FunctionUDRMap[CGF.CurFn])
1465  UDRMap.erase(D);
1466  FunctionUDRMap.erase(CGF.CurFn);
1467  }
1468  auto I = FunctionUDMMap.find(CGF.CurFn);
1469  if (I != FunctionUDMMap.end()) {
1470  for(const auto *D : I->second)
1471  UDMMap.erase(D);
1472  FunctionUDMMap.erase(I);
1473  }
1476 }
1477 
1479  return OMPBuilder.IdentPtr;
1480 }
1481 
1483  if (!Kmpc_MicroTy) {
1484  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1485  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1486  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1487  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1488  }
1489  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1490 }
1491 
1492 llvm::FunctionCallee
1493 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1494  bool IsGPUDistribute) {
1495  assert((IVSize == 32 || IVSize == 64) &&
1496  "IV size is not compatible with the omp runtime");
1497  StringRef Name;
1498  if (IsGPUDistribute)
1499  Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1500  : "__kmpc_distribute_static_init_4u")
1501  : (IVSigned ? "__kmpc_distribute_static_init_8"
1502  : "__kmpc_distribute_static_init_8u");
1503  else
1504  Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1505  : "__kmpc_for_static_init_4u")
1506  : (IVSigned ? "__kmpc_for_static_init_8"
1507  : "__kmpc_for_static_init_8u");
1508 
1509  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1510  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1511  llvm::Type *TypeParams[] = {
1512  getIdentTyPointerTy(), // loc
1513  CGM.Int32Ty, // tid
1514  CGM.Int32Ty, // schedtype
1515  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1516  PtrTy, // p_lower
1517  PtrTy, // p_upper
1518  PtrTy, // p_stride
1519  ITy, // incr
1520  ITy // chunk
1521  };
1522  auto *FnTy =
1523  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1524  return CGM.CreateRuntimeFunction(FnTy, Name);
1525 }
1526 
1527 llvm::FunctionCallee
1528 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1529  assert((IVSize == 32 || IVSize == 64) &&
1530  "IV size is not compatible with the omp runtime");
1531  StringRef Name =
1532  IVSize == 32
1533  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1534  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1535  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1536  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1537  CGM.Int32Ty, // tid
1538  CGM.Int32Ty, // schedtype
1539  ITy, // lower
1540  ITy, // upper
1541  ITy, // stride
1542  ITy // chunk
1543  };
1544  auto *FnTy =
1545  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1546  return CGM.CreateRuntimeFunction(FnTy, Name);
1547 }
1548 
1549 llvm::FunctionCallee
1550 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1551  assert((IVSize == 32 || IVSize == 64) &&
1552  "IV size is not compatible with the omp runtime");
1553  StringRef Name =
1554  IVSize == 32
1555  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1556  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1557  llvm::Type *TypeParams[] = {
1558  getIdentTyPointerTy(), // loc
1559  CGM.Int32Ty, // tid
1560  };
1561  auto *FnTy =
1562  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1563  return CGM.CreateRuntimeFunction(FnTy, Name);
1564 }
1565 
1566 llvm::FunctionCallee
1567 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1568  assert((IVSize == 32 || IVSize == 64) &&
1569  "IV size is not compatible with the omp runtime");
1570  StringRef Name =
1571  IVSize == 32
1572  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1573  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1574  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1575  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1576  llvm::Type *TypeParams[] = {
1577  getIdentTyPointerTy(), // loc
1578  CGM.Int32Ty, // tid
1579  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1580  PtrTy, // p_lower
1581  PtrTy, // p_upper
1582  PtrTy // p_stride
1583  };
1584  auto *FnTy =
1585  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1586  return CGM.CreateRuntimeFunction(FnTy, Name);
1587 }
1588 
1589 /// Obtain information that uniquely identifies a target entry. This
1590 /// consists of the file and device IDs as well as line number associated with
1591 /// the relevant entry source location.
1592 static llvm::TargetRegionEntryInfo
1594  StringRef ParentName = "") {
1595  SourceManager &SM = C.getSourceManager();
1596 
1597  // The loc should be always valid and have a file ID (the user cannot use
1598  // #pragma directives in macros)
1599 
1600  assert(Loc.isValid() && "Source location is expected to be always valid.");
1601 
1602  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1603  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1604 
1605  llvm::sys::fs::UniqueID ID;
1606  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1607  PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1608  assert(PLoc.isValid() && "Source location is expected to be always valid.");
1609  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1610  SM.getDiagnostics().Report(diag::err_cannot_open_file)
1611  << PLoc.getFilename() << EC.message();
1612  }
1613 
1614  return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(),
1615  PLoc.getLine());
1616 }
1617 
1619  if (CGM.getLangOpts().OpenMPSimd)
1620  return Address::invalid();
1622  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1623  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1624  ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1625  *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1627  SmallString<64> PtrName;
1628  {
1629  llvm::raw_svector_ostream OS(PtrName);
1630  OS << CGM.getMangledName(GlobalDecl(VD));
1631  if (!VD->isExternallyVisible()) {
1632  auto EntryInfo = getTargetEntryUniqueInfo(
1634  OS << llvm::format("_%x", EntryInfo.FileID);
1635  }
1636  OS << "_decl_tgt_ref_ptr";
1637  }
1638  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1639  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1640  llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1641  if (!Ptr) {
1642  Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1643 
1644  auto *GV = cast<llvm::GlobalVariable>(Ptr);
1645  GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1646 
1647  if (!CGM.getLangOpts().OpenMPIsDevice)
1648  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1649  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1650  }
1651  return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1652  }
1653  return Address::invalid();
1654 }
1655 
1656 llvm::Constant *
1658  assert(!CGM.getLangOpts().OpenMPUseTLS ||
1660  // Lookup the entry, lazily creating it if necessary.
1661  std::string Suffix = getName({"cache", ""});
1662  return OMPBuilder.getOrCreateInternalVariable(
1663  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1664 }
1665 
1667  const VarDecl *VD,
1668  Address VDAddr,
1669  SourceLocation Loc) {
1670  if (CGM.getLangOpts().OpenMPUseTLS &&
1672  return VDAddr;
1673 
1674  llvm::Type *VarTy = VDAddr.getElementType();
1675  llvm::Value *Args[] = {
1676  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1677  CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1680  return Address(
1681  CGF.EmitRuntimeCall(
1682  OMPBuilder.getOrCreateRuntimeFunction(
1683  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1684  Args),
1685  CGF.Int8Ty, VDAddr.getAlignment());
1686 }
1687 
1689  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1690  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1691  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1692  // library.
1693  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1694  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1695  CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1696  OMPLoc);
1697  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1698  // to register constructor/destructor for variable.
1699  llvm::Value *Args[] = {
1700  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1701  Ctor, CopyCtor, Dtor};
1702  CGF.EmitRuntimeCall(
1703  OMPBuilder.getOrCreateRuntimeFunction(
1704  CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1705  Args);
1706 }
1707 
1709  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1710  bool PerformInit, CodeGenFunction *CGF) {
1711  if (CGM.getLangOpts().OpenMPUseTLS &&
1713  return nullptr;
1714 
1715  VD = VD->getDefinition(CGM.getContext());
1716  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1717  QualType ASTTy = VD->getType();
1718 
1719  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1720  const Expr *Init = VD->getAnyInitializer();
1721  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1722  // Generate function that re-emits the declaration's initializer into the
1723  // threadprivate copy of the variable VD
1724  CodeGenFunction CtorCGF(CGM);
1725  FunctionArgList Args;
1726  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1727  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1729  Args.push_back(&Dst);
1730 
1731  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1732  CGM.getContext().VoidPtrTy, Args);
1733  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1734  std::string Name = getName({"__kmpc_global_ctor_", ""});
1735  llvm::Function *Fn =
1736  CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1737  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1738  Args, Loc, Loc);
1739  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1740  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1741  CGM.getContext().VoidPtrTy, Dst.getLocation());
1742  Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1743  Arg = CtorCGF.Builder.CreateElementBitCast(
1744  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1745  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1746  /*IsInitializer=*/true);
1747  ArgVal = CtorCGF.EmitLoadOfScalar(
1748  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1749  CGM.getContext().VoidPtrTy, Dst.getLocation());
1750  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1751  CtorCGF.FinishFunction();
1752  Ctor = Fn;
1753  }
1754  if (VD->getType().isDestructedType() != QualType::DK_none) {
1755  // Generate function that emits destructor call for the threadprivate copy
1756  // of the variable VD
1757  CodeGenFunction DtorCGF(CGM);
1758  FunctionArgList Args;
1759  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1760  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1762  Args.push_back(&Dst);
1763 
1764  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1765  CGM.getContext().VoidTy, Args);
1766  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1767  std::string Name = getName({"__kmpc_global_dtor_", ""});
1768  llvm::Function *Fn =
1769  CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1770  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1771  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1772  Loc, Loc);
1773  // Create a scope with an artificial location for the body of this function.
1774  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1775  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1776  DtorCGF.GetAddrOfLocalVar(&Dst),
1777  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1778  DtorCGF.emitDestroy(
1779  Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1780  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1781  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1782  DtorCGF.FinishFunction();
1783  Dtor = Fn;
1784  }
1785  // Do not emit init function if it is not required.
1786  if (!Ctor && !Dtor)
1787  return nullptr;
1788 
1789  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1790  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1791  /*isVarArg=*/false)
1792  ->getPointerTo();
1793  // Copying constructor for the threadprivate variable.
1794  // Must be NULL - reserved by runtime, but currently it requires that this
1795  // parameter is always NULL. Otherwise it fires assertion.
1796  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1797  if (Ctor == nullptr) {
1798  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1799  /*isVarArg=*/false)
1800  ->getPointerTo();
1801  Ctor = llvm::Constant::getNullValue(CtorTy);
1802  }
1803  if (Dtor == nullptr) {
1804  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1805  /*isVarArg=*/false)
1806  ->getPointerTo();
1807  Dtor = llvm::Constant::getNullValue(DtorTy);
1808  }
1809  if (!CGF) {
1810  auto *InitFunctionTy =
1811  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1812  std::string Name = getName({"__omp_threadprivate_init_", ""});
1813  llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1814  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1815  CodeGenFunction InitCGF(CGM);
1816  FunctionArgList ArgList;
1817  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1818  CGM.getTypes().arrangeNullaryFunction(), ArgList,
1819  Loc, Loc);
1820  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1821  InitCGF.FinishFunction();
1822  return InitFunction;
1823  }
1824  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1825  }
1826  return nullptr;
1827 }
1828 
1830  llvm::GlobalVariable *Addr,
1831  bool PerformInit) {
1832  if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1833  !CGM.getLangOpts().OpenMPIsDevice)
1834  return false;
1836  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1837  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1838  ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1839  *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1841  return CGM.getLangOpts().OpenMPIsDevice;
1842  VD = VD->getDefinition(CGM.getContext());
1843  assert(VD && "Unknown VarDecl");
1844 
1845  if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1846  return CGM.getLangOpts().OpenMPIsDevice;
1847 
1848  QualType ASTTy = VD->getType();
1850 
1851  // Produce the unique prefix to identify the new target regions. We use
1852  // the source location of the variable declaration which we know to not
1853  // conflict with any target region.
1854  auto EntryInfo =
1856  SmallString<128> Buffer, Out;
1857  OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1858 
1859  const Expr *Init = VD->getAnyInitializer();
1860  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1861  llvm::Constant *Ctor;
1862  llvm::Constant *ID;
1863  if (CGM.getLangOpts().OpenMPIsDevice) {
1864  // Generate function that re-emits the declaration's initializer into
1865  // the threadprivate copy of the variable VD
1866  CodeGenFunction CtorCGF(CGM);
1867 
1869  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1870  llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1871  FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1872  llvm::GlobalValue::WeakODRLinkage);
1873  Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1874  if (CGM.getTriple().isAMDGCN())
1875  Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1876  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1877  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1878  FunctionArgList(), Loc, Loc);
1879  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1880  llvm::Constant *AddrInAS0 = Addr;
1881  if (Addr->getAddressSpace() != 0)
1882  AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1883  Addr, llvm::PointerType::getWithSamePointeeType(
1884  cast<llvm::PointerType>(Addr->getType()), 0));
1885  CtorCGF.EmitAnyExprToMem(Init,
1886  Address(AddrInAS0, Addr->getValueType(),
1887  CGM.getContext().getDeclAlign(VD)),
1888  Init->getType().getQualifiers(),
1889  /*IsInitializer=*/true);
1890  CtorCGF.FinishFunction();
1891  Ctor = Fn;
1892  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1893  } else {
1894  Ctor = new llvm::GlobalVariable(
1895  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1896  llvm::GlobalValue::PrivateLinkage,
1897  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1898  ID = Ctor;
1899  }
1900 
1901  // Register the information for the entry associated with the constructor.
1902  Out.clear();
1903  auto CtorEntryInfo = EntryInfo;
1904  CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1905  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1906  CtorEntryInfo, Ctor, ID,
1907  llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1908  }
1909  if (VD->getType().isDestructedType() != QualType::DK_none) {
1910  llvm::Constant *Dtor;
1911  llvm::Constant *ID;
1912  if (CGM.getLangOpts().OpenMPIsDevice) {
1913  // Generate function that emits destructor call for the threadprivate
1914  // copy of the variable VD
1915  CodeGenFunction DtorCGF(CGM);
1916 
1918  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1919  llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1920  FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1921  llvm::GlobalValue::WeakODRLinkage);
1922  Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1923  if (CGM.getTriple().isAMDGCN())
1924  Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1925  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1926  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1927  FunctionArgList(), Loc, Loc);
1928  // Create a scope with an artificial location for the body of this
1929  // function.
1930  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1931  llvm::Constant *AddrInAS0 = Addr;
1932  if (Addr->getAddressSpace() != 0)
1933  AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1934  Addr, llvm::PointerType::getWithSamePointeeType(
1935  cast<llvm::PointerType>(Addr->getType()), 0));
1936  DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1937  CGM.getContext().getDeclAlign(VD)),
1938  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1939  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1940  DtorCGF.FinishFunction();
1941  Dtor = Fn;
1942  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1943  } else {
1944  Dtor = new llvm::GlobalVariable(
1945  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1946  llvm::GlobalValue::PrivateLinkage,
1947  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1948  ID = Dtor;
1949  }
1950  // Register the information for the entry associated with the destructor.
1951  Out.clear();
1952  auto DtorEntryInfo = EntryInfo;
1953  DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1954  OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1955  DtorEntryInfo, Dtor, ID,
1956  llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1957  }
1958  return CGM.getLangOpts().OpenMPIsDevice;
1959 }
1960 
1962  QualType VarType,
1963  StringRef Name) {
1964  std::string Suffix = getName({"artificial", ""});
1965  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1966  llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1967  VarLVType, Twine(Name).concat(Suffix).str());
1968  if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1970  GAddr->setThreadLocal(/*Val=*/true);
1971  return Address(GAddr, GAddr->getValueType(),
1972  CGM.getContext().getTypeAlignInChars(VarType));
1973  }
1974  std::string CacheSuffix = getName({"cache", ""});
1975  llvm::Value *Args[] = {
1977  getThreadID(CGF, SourceLocation()),
1979  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1980  /*isSigned=*/false),
1981  OMPBuilder.getOrCreateInternalVariable(
1982  CGM.VoidPtrPtrTy,
1983  Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1984  return Address(
1986  CGF.EmitRuntimeCall(
1987  OMPBuilder.getOrCreateRuntimeFunction(
1988  CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1989  Args),
1990  VarLVType->getPointerTo(/*AddrSpace=*/0)),
1991  VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1992 }
1993 
1995  const RegionCodeGenTy &ThenGen,
1996  const RegionCodeGenTy &ElseGen) {
1997  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1998 
1999  // If the condition constant folds and can be elided, try to avoid emitting
2000  // the condition and the dead arm of the if/else.
2001  bool CondConstant;
2002  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2003  if (CondConstant)
2004  ThenGen(CGF);
2005  else
2006  ElseGen(CGF);
2007  return;
2008  }
2009 
2010  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2011  // emit the conditional branch.
2012  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2013  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2014  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2015  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2016 
2017  // Emit the 'then' code.
2018  CGF.EmitBlock(ThenBlock);
2019  ThenGen(CGF);
2020  CGF.EmitBranch(ContBlock);
2021  // Emit the 'else' code if present.
2022  // There is no need to emit line number for unconditional branch.
2024  CGF.EmitBlock(ElseBlock);
2025  ElseGen(CGF);
2026  // There is no need to emit line number for unconditional branch.
2028  CGF.EmitBranch(ContBlock);
2029  // Emit the continuation block for code after the if.
2030  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2031 }
2032 
2034  llvm::Function *OutlinedFn,
2035  ArrayRef<llvm::Value *> CapturedVars,
2036  const Expr *IfCond,
2037  llvm::Value *NumThreads) {
2038  if (!CGF.HaveInsertPoint())
2039  return;
2040  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2041  auto &M = CGM.getModule();
2042  auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2043  this](CodeGenFunction &CGF, PrePostActionTy &) {
2044  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2045  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2046  llvm::Value *Args[] = {
2047  RTLoc,
2048  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2049  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2051  RealArgs.append(std::begin(Args), std::end(Args));
2052  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2053 
2054  llvm::FunctionCallee RTLFn =
2055  OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2056  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2057  };
2058  auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2059  this](CodeGenFunction &CGF, PrePostActionTy &) {
2060  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2061  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2062  // Build calls:
2063  // __kmpc_serialized_parallel(&Loc, GTid);
2064  llvm::Value *Args[] = {RTLoc, ThreadID};
2065  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2066  M, OMPRTL___kmpc_serialized_parallel),
2067  Args);
2068 
2069  // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2070  Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2071  Address ZeroAddrBound =
2072  CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2073  /*Name=*/".bound.zero.addr");
2074  CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2075  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2076  // ThreadId for serialized parallels is 0.
2077  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2078  OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2079  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2080 
2081  // Ensure we do not inline the function. This is trivially true for the ones
2082  // passed to __kmpc_fork_call but the ones called in serialized regions
2083  // could be inlined. This is not a perfect but it is closer to the invariant
2084  // we want, namely, every data environment starts with a new function.
2085  // TODO: We should pass the if condition to the runtime function and do the
2086  // handling there. Much cleaner code.
2087  OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2088  OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2089  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2090 
2091  // __kmpc_end_serialized_parallel(&Loc, GTid);
2092  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2093  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2094  M, OMPRTL___kmpc_end_serialized_parallel),
2095  EndArgs);
2096  };
2097  if (IfCond) {
2098  emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2099  } else {
2100  RegionCodeGenTy ThenRCG(ThenGen);
2101  ThenRCG(CGF);
2102  }
2103 }
2104 
2105 // If we're inside an (outlined) parallel region, use the region info's
2106 // thread-ID variable (it is passed in a first argument of the outlined function
2107 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2108 // regular serial code region, get thread ID by calling kmp_int32
2109 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2110 // return the address of that temp.
2112  SourceLocation Loc) {
2113  if (auto *OMPRegionInfo =
2114  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2115  if (OMPRegionInfo->getThreadIDVariable())
2116  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2117 
2118  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2119  QualType Int32Ty =
2120  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2121  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2122  CGF.EmitStoreOfScalar(ThreadID,
2123  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2124 
2125  return ThreadIDTemp;
2126 }
2127 
2128 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2129  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2130  std::string Name = getName({Prefix, "var"});
2131  return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2132 }
2133 
2134 namespace {
2135 /// Common pre(post)-action for different OpenMP constructs.
2136 class CommonActionTy final : public PrePostActionTy {
2137  llvm::FunctionCallee EnterCallee;
2138  ArrayRef<llvm::Value *> EnterArgs;
2139  llvm::FunctionCallee ExitCallee;
2140  ArrayRef<llvm::Value *> ExitArgs;
2141  bool Conditional;
2142  llvm::BasicBlock *ContBlock = nullptr;
2143 
2144 public:
2145  CommonActionTy(llvm::FunctionCallee EnterCallee,
2146  ArrayRef<llvm::Value *> EnterArgs,
2147  llvm::FunctionCallee ExitCallee,
2148  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2149  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2150  ExitArgs(ExitArgs), Conditional(Conditional) {}
2151  void Enter(CodeGenFunction &CGF) override {
2152  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2153  if (Conditional) {
2154  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2155  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2156  ContBlock = CGF.createBasicBlock("omp_if.end");
2157  // Generate the branch (If-stmt)
2158  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2159  CGF.EmitBlock(ThenBlock);
2160  }
2161  }
2162  void Done(CodeGenFunction &CGF) {
2163  // Emit the rest of blocks/branches
2164  CGF.EmitBranch(ContBlock);
2165  CGF.EmitBlock(ContBlock, true);
2166  }
2167  void Exit(CodeGenFunction &CGF) override {
2168  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2169  }
2170 };
2171 } // anonymous namespace
2172 
2174  StringRef CriticalName,
2175  const RegionCodeGenTy &CriticalOpGen,
2176  SourceLocation Loc, const Expr *Hint) {
2177  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2178  // CriticalOpGen();
2179  // __kmpc_end_critical(ident_t *, gtid, Lock);
2180  // Prepare arguments and build a call to __kmpc_critical
2181  if (!CGF.HaveInsertPoint())
2182  return;
2183  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2184  getCriticalRegionLock(CriticalName)};
2185  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2186  std::end(Args));
2187  if (Hint) {
2188  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2189  CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2190  }
2191  CommonActionTy Action(
2192  OMPBuilder.getOrCreateRuntimeFunction(
2193  CGM.getModule(),
2194  Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2195  EnterArgs,
2196  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2197  OMPRTL___kmpc_end_critical),
2198  Args);
2199  CriticalOpGen.setAction(Action);
2200  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2201 }
2202 
2204  const RegionCodeGenTy &MasterOpGen,
2205  SourceLocation Loc) {
2206  if (!CGF.HaveInsertPoint())
2207  return;
2208  // if(__kmpc_master(ident_t *, gtid)) {
2209  // MasterOpGen();
2210  // __kmpc_end_master(ident_t *, gtid);
2211  // }
2212  // Prepare arguments and build a call to __kmpc_master
2213  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2214  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2215  CGM.getModule(), OMPRTL___kmpc_master),
2216  Args,
2217  OMPBuilder.getOrCreateRuntimeFunction(
2218  CGM.getModule(), OMPRTL___kmpc_end_master),
2219  Args,
2220  /*Conditional=*/true);
2221  MasterOpGen.setAction(Action);
2222  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2223  Action.Done(CGF);
2224 }
2225 
2227  const RegionCodeGenTy &MaskedOpGen,
2228  SourceLocation Loc, const Expr *Filter) {
2229  if (!CGF.HaveInsertPoint())
2230  return;
2231  // if(__kmpc_masked(ident_t *, gtid, filter)) {
2232  // MaskedOpGen();
2233  // __kmpc_end_masked(iden_t *, gtid);
2234  // }
2235  // Prepare arguments and build a call to __kmpc_masked
2236  llvm::Value *FilterVal = Filter
2237  ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2238  : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2239  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2240  FilterVal};
2241  llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2242  getThreadID(CGF, Loc)};
2243  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2244  CGM.getModule(), OMPRTL___kmpc_masked),
2245  Args,
2246  OMPBuilder.getOrCreateRuntimeFunction(
2247  CGM.getModule(), OMPRTL___kmpc_end_masked),
2248  ArgsEnd,
2249  /*Conditional=*/true);
2250  MaskedOpGen.setAction(Action);
2251  emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2252  Action.Done(CGF);
2253 }
2254 
2256  SourceLocation Loc) {
2257  if (!CGF.HaveInsertPoint())
2258  return;
2259  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2260  OMPBuilder.createTaskyield(CGF.Builder);
2261  } else {
2262  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2263  llvm::Value *Args[] = {
2264  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2265  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2266  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2267  CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2268  Args);
2269  }
2270 
2271  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2272  Region->emitUntiedSwitch(CGF);
2273 }
2274 
2276  const RegionCodeGenTy &TaskgroupOpGen,
2277  SourceLocation Loc) {
2278  if (!CGF.HaveInsertPoint())
2279  return;
2280  // __kmpc_taskgroup(ident_t *, gtid);
2281  // TaskgroupOpGen();
2282  // __kmpc_end_taskgroup(ident_t *, gtid);
2283  // Prepare arguments and build a call to __kmpc_taskgroup
2284  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2285  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2286  CGM.getModule(), OMPRTL___kmpc_taskgroup),
2287  Args,
2288  OMPBuilder.getOrCreateRuntimeFunction(
2289  CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2290  Args);
2291  TaskgroupOpGen.setAction(Action);
2292  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2293 }
2294 
2295 /// Given an array of pointers to variables, project the address of a
2296 /// given variable.
2298  unsigned Index, const VarDecl *Var) {
2299  // Pull out the pointer to the variable.
2300  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2301  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2302 
2303  llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2304  return Address(
2305  CGF.Builder.CreateBitCast(
2306  Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2307  ElemTy, CGF.getContext().getDeclAlign(Var));
2308 }
2309 
2310 static llvm::Value *emitCopyprivateCopyFunction(
2311  CodeGenModule &CGM, llvm::Type *ArgsElemType,
2312  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2313  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2314  SourceLocation Loc) {
2315  ASTContext &C = CGM.getContext();
2316  // void copy_func(void *LHSArg, void *RHSArg);
2317  FunctionArgList Args;
2318  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2320  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2322  Args.push_back(&LHSArg);
2323  Args.push_back(&RHSArg);
2324  const auto &CGFI =
2325  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2326  std::string Name =
2327  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2328  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2330  &CGM.getModule());
2331  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2332  Fn->setDoesNotRecurse();
2333  CodeGenFunction CGF(CGM);
2334  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2335  // Dest = (void*[n])(LHSArg);
2336  // Src = (void*[n])(RHSArg);
2338  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2339  ArgsElemType->getPointerTo()),
2340  ArgsElemType, CGF.getPointerAlign());
2342  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2343  ArgsElemType->getPointerTo()),
2344  ArgsElemType, CGF.getPointerAlign());
2345  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2346  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2347  // ...
2348  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2349  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2350  const auto *DestVar =
2351  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2352  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2353 
2354  const auto *SrcVar =
2355  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2356  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2357 
2358  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2359  QualType Type = VD->getType();
2360  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2361  }
2362  CGF.FinishFunction();
2363  return Fn;
2364 }
2365 
2367  const RegionCodeGenTy &SingleOpGen,
2368  SourceLocation Loc,
2369  ArrayRef<const Expr *> CopyprivateVars,
2370  ArrayRef<const Expr *> SrcExprs,
2371  ArrayRef<const Expr *> DstExprs,
2372  ArrayRef<const Expr *> AssignmentOps) {
2373  if (!CGF.HaveInsertPoint())
2374  return;
2375  assert(CopyprivateVars.size() == SrcExprs.size() &&
2376  CopyprivateVars.size() == DstExprs.size() &&
2377  CopyprivateVars.size() == AssignmentOps.size());
2378  ASTContext &C = CGM.getContext();
2379  // int32 did_it = 0;
2380  // if(__kmpc_single(ident_t *, gtid)) {
2381  // SingleOpGen();
2382  // __kmpc_end_single(ident_t *, gtid);
2383  // did_it = 1;
2384  // }
2385  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2386  // <copy_func>, did_it);
2387 
2388  Address DidIt = Address::invalid();
2389  if (!CopyprivateVars.empty()) {
2390  // int32 did_it = 0;
2391  QualType KmpInt32Ty =
2392  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2393  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2394  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2395  }
2396  // Prepare arguments and build a call to __kmpc_single
2397  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2398  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2399  CGM.getModule(), OMPRTL___kmpc_single),
2400  Args,
2401  OMPBuilder.getOrCreateRuntimeFunction(
2402  CGM.getModule(), OMPRTL___kmpc_end_single),
2403  Args,
2404  /*Conditional=*/true);
2405  SingleOpGen.setAction(Action);
2406  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2407  if (DidIt.isValid()) {
2408  // did_it = 1;
2409  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2410  }
2411  Action.Done(CGF);
2412  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2413  // <copy_func>, did_it);
2414  if (DidIt.isValid()) {
2415  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2416  QualType CopyprivateArrayTy = C.getConstantArrayType(
2417  C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2418  /*IndexTypeQuals=*/0);
2419  // Create a list of all private variables for copyprivate.
2420  Address CopyprivateList =
2421  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2422  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2423  Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2424  CGF.Builder.CreateStore(
2426  CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2427  CGF.VoidPtrTy),
2428  Elem);
2429  }
2430  // Build function that copies private values from single region to all other
2431  // threads in the corresponding parallel region.
2432  llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2433  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2434  SrcExprs, DstExprs, AssignmentOps, Loc);
2435  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2437  CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2438  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2439  llvm::Value *Args[] = {
2440  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2441  getThreadID(CGF, Loc), // i32 <gtid>
2442  BufSize, // size_t <buf_size>
2443  CL.getPointer(), // void *<copyprivate list>
2444  CpyFn, // void (*) (void *, void *) <copy_func>
2445  DidItVal // i32 did_it
2446  };
2447  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2448  CGM.getModule(), OMPRTL___kmpc_copyprivate),
2449  Args);
2450  }
2451 }
2452 
2454  const RegionCodeGenTy &OrderedOpGen,
2455  SourceLocation Loc, bool IsThreads) {
2456  if (!CGF.HaveInsertPoint())
2457  return;
2458  // __kmpc_ordered(ident_t *, gtid);
2459  // OrderedOpGen();
2460  // __kmpc_end_ordered(ident_t *, gtid);
2461  // Prepare arguments and build a call to __kmpc_ordered
2462  if (IsThreads) {
2463  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2464  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2465  CGM.getModule(), OMPRTL___kmpc_ordered),
2466  Args,
2467  OMPBuilder.getOrCreateRuntimeFunction(
2468  CGM.getModule(), OMPRTL___kmpc_end_ordered),
2469  Args);
2470  OrderedOpGen.setAction(Action);
2471  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2472  return;
2473  }
2474  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2475 }
2476 
2478  unsigned Flags;
2479  if (Kind == OMPD_for)
2480  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2481  else if (Kind == OMPD_sections)
2482  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2483  else if (Kind == OMPD_single)
2484  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2485  else if (Kind == OMPD_barrier)
2486  Flags = OMP_IDENT_BARRIER_EXPL;
2487  else
2488  Flags = OMP_IDENT_BARRIER_IMPL;
2489  return Flags;
2490 }
2491 
2493  CodeGenFunction &CGF, const OMPLoopDirective &S,
2494  OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2495  // Check if the loop directive is actually a doacross loop directive. In this
2496  // case choose static, 1 schedule.
2497  if (llvm::any_of(
2498  S.getClausesOfKind<OMPOrderedClause>(),
2499  [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2500  ScheduleKind = OMPC_SCHEDULE_static;
2501  // Chunk size is 1 in this case.
2502  llvm::APInt ChunkSize(32, 1);
2503  ChunkExpr = IntegerLiteral::Create(
2504  CGF.getContext(), ChunkSize,
2505  CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2506  SourceLocation());
2507  }
2508 }
2509 
2511  OpenMPDirectiveKind Kind, bool EmitChecks,
2512  bool ForceSimpleCall) {
2513  // Check if we should use the OMPBuilder
2514  auto *OMPRegionInfo =
2515  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2516  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2517  CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2518  CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2519  return;
2520  }
2521 
2522  if (!CGF.HaveInsertPoint())
2523  return;
2524  // Build call __kmpc_cancel_barrier(loc, thread_id);
2525  // Build call __kmpc_barrier(loc, thread_id);
2526  unsigned Flags = getDefaultFlagsForBarriers(Kind);
2527  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2528  // thread_id);
2529  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2530  getThreadID(CGF, Loc)};
2531  if (OMPRegionInfo) {
2532  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2533  llvm::Value *Result = CGF.EmitRuntimeCall(
2534  OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2535  OMPRTL___kmpc_cancel_barrier),
2536  Args);
2537  if (EmitChecks) {
2538  // if (__kmpc_cancel_barrier()) {
2539  // exit from construct;
2540  // }
2541  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2542  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2543  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2544  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2545  CGF.EmitBlock(ExitBB);
2546  // exit from construct;
2547  CodeGenFunction::JumpDest CancelDestination =
2548  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2549  CGF.EmitBranchThroughCleanup(CancelDestination);
2550  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2551  }
2552  return;
2553  }
2554  }
2555  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2556  CGM.getModule(), OMPRTL___kmpc_barrier),
2557  Args);
2558 }
2559 
2560 /// Map the OpenMP loop schedule to the runtime enumeration.
2561 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2562  bool Chunked, bool Ordered) {
2563  switch (ScheduleKind) {
2564  case OMPC_SCHEDULE_static:
2565  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2566  : (Ordered ? OMP_ord_static : OMP_sch_static);
2567  case OMPC_SCHEDULE_dynamic:
2568  return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2569  case OMPC_SCHEDULE_guided:
2570  return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2571  case OMPC_SCHEDULE_runtime:
2572  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2573  case OMPC_SCHEDULE_auto:
2574  return Ordered ? OMP_ord_auto : OMP_sch_auto;
2575  case OMPC_SCHEDULE_unknown:
2576  assert(!Chunked && "chunk was specified but schedule kind not known");
2577  return Ordered ? OMP_ord_static : OMP_sch_static;
2578  }
2579  llvm_unreachable("Unexpected runtime schedule");
2580 }
2581 
2582 /// Map the OpenMP distribute schedule to the runtime enumeration.
2583 static OpenMPSchedType
2585  // only static is allowed for dist_schedule
2586  return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2587 }
2588 
2590  bool Chunked) const {
2591  OpenMPSchedType Schedule =
2592  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2593  return Schedule == OMP_sch_static;
2594 }
2595 
2597  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2598  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2599  return Schedule == OMP_dist_sch_static;
2600 }
2601 
2603  bool Chunked) const {
2604  OpenMPSchedType Schedule =
2605  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2606  return Schedule == OMP_sch_static_chunked;
2607 }
2608 
2610  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2611  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2612  return Schedule == OMP_dist_sch_static_chunked;
2613 }
2614 
2616  OpenMPSchedType Schedule =
2617  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2618  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2619  return Schedule != OMP_sch_static;
2620 }
2621 
2622 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2625  int Modifier = 0;
2626  switch (M1) {
2627  case OMPC_SCHEDULE_MODIFIER_monotonic:
2628  Modifier = OMP_sch_modifier_monotonic;
2629  break;
2630  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2631  Modifier = OMP_sch_modifier_nonmonotonic;
2632  break;
2633  case OMPC_SCHEDULE_MODIFIER_simd:
2634  if (Schedule == OMP_sch_static_chunked)
2635  Schedule = OMP_sch_static_balanced_chunked;
2636  break;
2639  break;
2640  }
2641  switch (M2) {
2642  case OMPC_SCHEDULE_MODIFIER_monotonic:
2643  Modifier = OMP_sch_modifier_monotonic;
2644  break;
2645  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2646  Modifier = OMP_sch_modifier_nonmonotonic;
2647  break;
2648  case OMPC_SCHEDULE_MODIFIER_simd:
2649  if (Schedule == OMP_sch_static_chunked)
2650  Schedule = OMP_sch_static_balanced_chunked;
2651  break;
2654  break;
2655  }
2656  // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2657  // If the static schedule kind is specified or if the ordered clause is
2658  // specified, and if the nonmonotonic modifier is not specified, the effect is
2659  // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2660  // modifier is specified, the effect is as if the nonmonotonic modifier is
2661  // specified.
2662  if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2663  if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2664  Schedule == OMP_sch_static_balanced_chunked ||
2665  Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2666  Schedule == OMP_dist_sch_static_chunked ||
2667  Schedule == OMP_dist_sch_static))
2668  Modifier = OMP_sch_modifier_nonmonotonic;
2669  }
2670  return Schedule | Modifier;
2671 }
2672 
2674  CodeGenFunction &CGF, SourceLocation Loc,
2675  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2676  bool Ordered, const DispatchRTInput &DispatchValues) {
2677  if (!CGF.HaveInsertPoint())
2678  return;
2679  OpenMPSchedType Schedule = getRuntimeSchedule(
2680  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2681  assert(Ordered ||
2682  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2683  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2684  Schedule != OMP_sch_static_balanced_chunked));
2685  // Call __kmpc_dispatch_init(
2686  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2687  // kmp_int[32|64] lower, kmp_int[32|64] upper,
2688  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2689 
2690  // If the Chunk was not specified in the clause - use default value 1.
2691  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2692  : CGF.Builder.getIntN(IVSize, 1);
2693  llvm::Value *Args[] = {
2694  emitUpdateLocation(CGF, Loc),
2695  getThreadID(CGF, Loc),
2696  CGF.Builder.getInt32(addMonoNonMonoModifier(
2697  CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2698  DispatchValues.LB, // Lower
2699  DispatchValues.UB, // Upper
2700  CGF.Builder.getIntN(IVSize, 1), // Stride
2701  Chunk // Chunk
2702  };
2703  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2704 }
2705 
2707  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2708  llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2710  const CGOpenMPRuntime::StaticRTInput &Values) {
2711  if (!CGF.HaveInsertPoint())
2712  return;
2713 
2714  assert(!Values.Ordered);
2715  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2716  Schedule == OMP_sch_static_balanced_chunked ||
2717  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2718  Schedule == OMP_dist_sch_static ||
2719  Schedule == OMP_dist_sch_static_chunked);
2720 
2721  // Call __kmpc_for_static_init(
2722  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2723  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2724  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2725  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2726  llvm::Value *Chunk = Values.Chunk;
2727  if (Chunk == nullptr) {
2728  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2729  Schedule == OMP_dist_sch_static) &&
2730  "expected static non-chunked schedule");
2731  // If the Chunk was not specified in the clause - use default value 1.
2732  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2733  } else {
2734  assert((Schedule == OMP_sch_static_chunked ||
2735  Schedule == OMP_sch_static_balanced_chunked ||
2736  Schedule == OMP_ord_static_chunked ||
2737  Schedule == OMP_dist_sch_static_chunked) &&
2738  "expected static chunked schedule");
2739  }
2740  llvm::Value *Args[] = {
2741  UpdateLocation,
2742  ThreadId,
2743  CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2744  M2)), // Schedule type
2745  Values.IL.getPointer(), // &isLastIter
2746  Values.LB.getPointer(), // &LB
2747  Values.UB.getPointer(), // &UB
2748  Values.ST.getPointer(), // &Stride
2749  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2750  Chunk // Chunk
2751  };
2752  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2753 }
2754 
2756  SourceLocation Loc,
2757  OpenMPDirectiveKind DKind,
2758  const OpenMPScheduleTy &ScheduleKind,
2759  const StaticRTInput &Values) {
2760  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2761  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2762  assert(isOpenMPWorksharingDirective(DKind) &&
2763  "Expected loop-based or sections-based directive.");
2764  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2765  isOpenMPLoopDirective(DKind)
2766  ? OMP_IDENT_WORK_LOOP
2767  : OMP_IDENT_WORK_SECTIONS);
2768  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2769  llvm::FunctionCallee StaticInitFunction =
2770  createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2771  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2772  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2773  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2774 }
2775 
2777  CodeGenFunction &CGF, SourceLocation Loc,
2778  OpenMPDistScheduleClauseKind SchedKind,
2779  const CGOpenMPRuntime::StaticRTInput &Values) {
2780  OpenMPSchedType ScheduleNum =
2781  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2782  llvm::Value *UpdatedLocation =
2783  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2784  llvm::Value *ThreadId = getThreadID(CGF, Loc);
2785  llvm::FunctionCallee StaticInitFunction;
2786  bool isGPUDistribute =
2787  CGM.getLangOpts().OpenMPIsDevice &&
2788  (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2789  StaticInitFunction = createForStaticInitFunction(
2790  Values.IVSize, Values.IVSigned, isGPUDistribute);
2791 
2792  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2793  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2795 }
2796 
2798  SourceLocation Loc,
2799  OpenMPDirectiveKind DKind) {
2800  if (!CGF.HaveInsertPoint())
2801  return;
2802  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2803  llvm::Value *Args[] = {
2804  emitUpdateLocation(CGF, Loc,
2806  ? OMP_IDENT_WORK_DISTRIBUTE
2807  : isOpenMPLoopDirective(DKind)
2808  ? OMP_IDENT_WORK_LOOP
2809  : OMP_IDENT_WORK_SECTIONS),
2810  getThreadID(CGF, Loc)};
2811  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2812  if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2813  (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2814  CGF.EmitRuntimeCall(
2815  OMPBuilder.getOrCreateRuntimeFunction(
2816  CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2817  Args);
2818  else
2819  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2820  CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2821  Args);
2822 }
2823 
2825  SourceLocation Loc,
2826  unsigned IVSize,
2827  bool IVSigned) {
2828  if (!CGF.HaveInsertPoint())
2829  return;
2830  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2831  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2832  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2833 }
2834 
2836  SourceLocation Loc, unsigned IVSize,
2837  bool IVSigned, Address IL,
2838  Address LB, Address UB,
2839  Address ST) {
2840  // Call __kmpc_dispatch_next(
2841  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2842  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2843  // kmp_int[32|64] *p_stride);
2844  llvm::Value *Args[] = {
2845  emitUpdateLocation(CGF, Loc),
2846  getThreadID(CGF, Loc),
2847  IL.getPointer(), // &isLastIter
2848  LB.getPointer(), // &Lower
2849  UB.getPointer(), // &Upper
2850  ST.getPointer() // &Stride
2851  };
2852  llvm::Value *Call =
2853  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2854  return CGF.EmitScalarConversion(
2855  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2856  CGF.getContext().BoolTy, Loc);
2857 }
2858 
2860  llvm::Value *NumThreads,
2861  SourceLocation Loc) {
2862  if (!CGF.HaveInsertPoint())
2863  return;
2864  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2865  llvm::Value *Args[] = {
2866  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2867  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2868  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2869  CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2870  Args);
2871 }
2872 
2874  ProcBindKind ProcBind,
2875  SourceLocation Loc) {
2876  if (!CGF.HaveInsertPoint())
2877  return;
2878  assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2879  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2880  llvm::Value *Args[] = {
2881  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2882  llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2883  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2884  CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2885  Args);
2886 }
2887 
2889  SourceLocation Loc, llvm::AtomicOrdering AO) {
2890  if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2891  OMPBuilder.createFlush(CGF.Builder);
2892  } else {
2893  if (!CGF.HaveInsertPoint())
2894  return;
2895  // Build call void __kmpc_flush(ident_t *loc)
2896  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2897  CGM.getModule(), OMPRTL___kmpc_flush),
2898  emitUpdateLocation(CGF, Loc));
2899  }
2900 }
2901 
2902 namespace {
2903 /// Indexes of fields for type kmp_task_t.
2904 enum KmpTaskTFields {
2905  /// List of shared variables.
2906  KmpTaskTShareds,
2907  /// Task routine.
2908  KmpTaskTRoutine,
2909  /// Partition id for the untied tasks.
2910  KmpTaskTPartId,
2911  /// Function with call of destructors for private variables.
2912  Data1,
2913  /// Task priority.
2914  Data2,
2915  /// (Taskloops only) Lower bound.
2916  KmpTaskTLowerBound,
2917  /// (Taskloops only) Upper bound.
2918  KmpTaskTUpperBound,
2919  /// (Taskloops only) Stride.
2920  KmpTaskTStride,
2921  /// (Taskloops only) Is last iteration flag.
2922  KmpTaskTLastIter,
2923  /// (Taskloops only) Reduction data.
2924  KmpTaskTReductions,
2925 };
2926 } // anonymous namespace
2927 
2929  // If we are in simd mode or there are no entries, we don't need to do
2930  // anything.
2931  if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
2932  return;
2933 
2934  llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2935  [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2936  const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2937  SourceLocation Loc;
2938  if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2939  for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2941  I != E; ++I) {
2942  if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
2943  I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
2945  I->getFirst(), EntryInfo.Line, 1);
2946  break;
2947  }
2948  }
2949  }
2950  switch (Kind) {
2951  case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2952  unsigned DiagID = CGM.getDiags().getCustomDiagID(
2953  DiagnosticsEngine::Error, "Offloading entry for target region in "
2954  "%0 is incorrect: either the "
2955  "address or the ID is invalid.");
2956  CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2957  } break;
2958  case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2959  unsigned DiagID = CGM.getDiags().getCustomDiagID(
2960  DiagnosticsEngine::Error, "Offloading entry for declare target "
2961  "variable %0 is incorrect: the "
2962  "address is invalid.");
2963  CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2964  } break;
2965  case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2966  unsigned DiagID = CGM.getDiags().getCustomDiagID(
2968  "Offloading entry for declare target variable is incorrect: the "
2969  "address is invalid.");
2970  CGM.getDiags().Report(DiagID);
2971  } break;
2972  }
2973  };
2974 
2975  OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager,
2976  ErrorReportFn);
2977 }
2978 
2979 /// Loads all the offload entries information from the host IR
2980 /// metadata.
2982  // If we are in target mode, load the metadata from the host IR. This code has
2983  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
2984 
2985  if (!CGM.getLangOpts().OpenMPIsDevice)
2986  return;
2987 
2988  if (CGM.getLangOpts().OMPHostIRFile.empty())
2989  return;
2990 
2991  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
2992  if (auto EC = Buf.getError()) {
2993  CGM.getDiags().Report(diag::err_cannot_open_file)
2994  << CGM.getLangOpts().OMPHostIRFile << EC.message();
2995  return;
2996  }
2997 
2998  llvm::LLVMContext C;
2999  auto ME = expectedToErrorOrAndEmitErrors(
3000  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3001 
3002  if (auto EC = ME.getError()) {
3003  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3004  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3005  CGM.getDiags().Report(DiagID)
3006  << CGM.getLangOpts().OMPHostIRFile << EC.message();
3007  return;
3008  }
3009 
3010  OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager);
3011 }
3012 
3014  if (!KmpRoutineEntryPtrTy) {
3015  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3016  ASTContext &C = CGM.getContext();
3017  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3019  KmpRoutineEntryPtrQTy = C.getPointerType(
3020  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3022  }
3023 }
3024 
3025 namespace {
3026 struct PrivateHelpersTy {
3027  PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3028  const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3029  : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3030  PrivateElemInit(PrivateElemInit) {}
3031  PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3032  const Expr *OriginalRef = nullptr;
3033  const VarDecl *Original = nullptr;
3034  const VarDecl *PrivateCopy = nullptr;
3035  const VarDecl *PrivateElemInit = nullptr;
3036  bool isLocalPrivate() const {
3037  return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3038  }
3039 };
3040 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3041 } // anonymous namespace
3042 
3043 static bool isAllocatableDecl(const VarDecl *VD) {
3044  const VarDecl *CVD = VD->getCanonicalDecl();
3045  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3046  return false;
3047  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3048  // Use the default allocation.
3049  return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3050  !AA->getAllocator());
3051 }
3052 
3053 static RecordDecl *
3055  if (!Privates.empty()) {
3056  ASTContext &C = CGM.getContext();
3057  // Build struct .kmp_privates_t. {
3058  // /* private vars */
3059  // };
3060  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3061  RD->startDefinition();
3062  for (const auto &Pair : Privates) {
3063  const VarDecl *VD = Pair.second.Original;
3065  // If the private variable is a local variable with lvalue ref type,
3066  // allocate the pointer instead of the pointee type.
3067  if (Pair.second.isLocalPrivate()) {
3068  if (VD->getType()->isLValueReferenceType())
3069  Type = C.getPointerType(Type);
3070  if (isAllocatableDecl(VD))
3071  Type = C.getPointerType(Type);
3072  }
3073  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3074  if (VD->hasAttrs()) {
3075  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3076  E(VD->getAttrs().end());
3077  I != E; ++I)
3078  FD->addAttr(*I);
3079  }
3080  }
3081  RD->completeDefinition();
3082  return RD;
3083  }
3084  return nullptr;
3085 }
3086 
3087 static RecordDecl *
3089  QualType KmpInt32Ty,
3090  QualType KmpRoutineEntryPointerQTy) {
3091  ASTContext &C = CGM.getContext();
3092  // Build struct kmp_task_t {
3093  // void * shareds;
3094  // kmp_routine_entry_t routine;
3095  // kmp_int32 part_id;
3096  // kmp_cmplrdata_t data1;
3097  // kmp_cmplrdata_t data2;
3098  // For taskloops additional fields:
3099  // kmp_uint64 lb;
3100  // kmp_uint64 ub;
3101  // kmp_int64 st;
3102  // kmp_int32 liter;
3103  // void * reductions;
3104  // };
3105  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3106  UD->startDefinition();
3107  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3108  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3109  UD->completeDefinition();
3110  QualType KmpCmplrdataTy = C.getRecordType(UD);
3111  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3112  RD->startDefinition();
3113  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3114  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3115  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3116  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3117  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3119  QualType KmpUInt64Ty =
3120  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3121  QualType KmpInt64Ty =
3122  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3123  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3124  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3125  addFieldToRecordDecl(C, RD, KmpInt64Ty);
3126  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3127  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3128  }
3129  RD->completeDefinition();
3130  return RD;
3131 }
3132 
3133 static RecordDecl *
3135  ArrayRef<PrivateDataTy> Privates) {
3136  ASTContext &C = CGM.getContext();
3137  // Build struct kmp_task_t_with_privates {
3138  // kmp_task_t task_data;
3139  // .kmp_privates_t. privates;
3140  // };
3141  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3142  RD->startDefinition();
3143  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3144  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3145  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3146  RD->completeDefinition();
3147  return RD;
3148 }
3149 
3150 /// Emit a proxy function which accepts kmp_task_t as the second
3151 /// argument.
3152 /// \code
3153 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3154 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3155 /// For taskloops:
3156 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3157 /// tt->reductions, tt->shareds);
3158 /// return 0;
3159 /// }
3160 /// \endcode
3161 static llvm::Function *
3163  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3164  QualType KmpTaskTWithPrivatesPtrQTy,
3165  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3166  QualType SharedsPtrTy, llvm::Function *TaskFunction,
3167  llvm::Value *TaskPrivatesMap) {
3168  ASTContext &C = CGM.getContext();
3169  FunctionArgList Args;
3170  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3172  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3173  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3175  Args.push_back(&GtidArg);
3176  Args.push_back(&TaskTypeArg);
3177  const auto &TaskEntryFnInfo =
3178  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3179  llvm::FunctionType *TaskEntryTy =
3180  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3181  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3182  auto *TaskEntry = llvm::Function::Create(
3183  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3184  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3185  TaskEntry->setDoesNotRecurse();
3186  CodeGenFunction CGF(CGM);
3187  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3188  Loc, Loc);
3189 
3190  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3191  // tt,
3192  // For taskloops:
3193  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3194  // tt->task_data.shareds);
3195  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3196  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3197  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3198  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3199  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3200  const auto *KmpTaskTWithPrivatesQTyRD =
3201  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3202  LValue Base =
3203  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3204  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3205  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3206  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3207  llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3208 
3209  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3210  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3211  llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3212  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3213  CGF.ConvertTypeForMem(SharedsPtrTy));
3214 
3215  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3216  llvm::Value *PrivatesParam;
3217  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3218  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3219  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3220  PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3221  } else {
3222  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3223  }
3224 
3225  llvm::Value *CommonArgs[] = {
3226  GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3227  CGF.Builder
3229  CGF.VoidPtrTy, CGF.Int8Ty)
3230  .getPointer()};
3231  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3232  std::end(CommonArgs));
3234  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3235  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3236  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3237  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3238  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3239  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3240  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3241  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3242  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3243  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3244  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3245  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3246  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3247  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3248  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3249  CallArgs.push_back(LBParam);
3250  CallArgs.push_back(UBParam);
3251  CallArgs.push_back(StParam);
3252  CallArgs.push_back(LIParam);
3253  CallArgs.push_back(RParam);
3254  }
3255  CallArgs.push_back(SharedsParam);
3256 
3257  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3258  CallArgs);
3259  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3260  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3261  CGF.FinishFunction();
3262  return TaskEntry;
3263 }
3264 
3265 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3266  SourceLocation Loc,
3267  QualType KmpInt32Ty,
3268  QualType KmpTaskTWithPrivatesPtrQTy,
3269  QualType KmpTaskTWithPrivatesQTy) {
3270  ASTContext &C = CGM.getContext();
3271  FunctionArgList Args;
3272  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3274  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3275  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3277  Args.push_back(&GtidArg);
3278  Args.push_back(&TaskTypeArg);
3279  const auto &DestructorFnInfo =
3280  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3281  llvm::FunctionType *DestructorFnTy =
3282  CGM.getTypes().GetFunctionType(DestructorFnInfo);
3283  std::string Name =
3284  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3285  auto *DestructorFn =
3286  llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3287  Name, &CGM.getModule());
3288  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3289  DestructorFnInfo);
3290  DestructorFn->setDoesNotRecurse();
3291  CodeGenFunction CGF(CGM);
3292  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3293  Args, Loc, Loc);
3294 
3296  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3297  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3298  const auto *KmpTaskTWithPrivatesQTyRD =
3299  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3300  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3301  Base = CGF.EmitLValueForField(Base, *FI);
3302  for (const auto *Field :
3303  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3304  if (QualType::DestructionKind DtorKind =
3305  Field->getType().isDestructedType()) {
3306  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3307  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3308  }
3309  }
3310  CGF.FinishFunction();
3311  return DestructorFn;
3312 }
3313 
3314 /// Emit a privates mapping function for correct handling of private and
3315 /// firstprivate variables.
3316 /// \code
3317 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3318 /// **noalias priv1,..., <tyn> **noalias privn) {
3319 /// *priv1 = &.privates.priv1;
3320 /// ...;
3321 /// *privn = &.privates.privn;
3322 /// }
3323 /// \endcode
3324 static llvm::Value *
3326  const OMPTaskDataTy &Data, QualType PrivatesQTy,
3327  ArrayRef<PrivateDataTy> Privates) {
3328  ASTContext &C = CGM.getContext();
3329  FunctionArgList Args;
3330  ImplicitParamDecl TaskPrivatesArg(
3331  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3332  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3334  Args.push_back(&TaskPrivatesArg);
3335  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3336  unsigned Counter = 1;
3337  for (const Expr *E : Data.PrivateVars) {
3338  Args.push_back(ImplicitParamDecl::Create(
3339  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3340  C.getPointerType(C.getPointerType(E->getType()))
3341  .withConst()
3342  .withRestrict(),
3344  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3345  PrivateVarsPos[VD] = Counter;
3346  ++Counter;
3347  }
3348  for (const Expr *E : Data.FirstprivateVars) {
3349  Args.push_back(ImplicitParamDecl::Create(
3350  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3351  C.getPointerType(C.getPointerType(E->getType()))
3352  .withConst()
3353  .withRestrict(),
3355  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3356  PrivateVarsPos[VD] = Counter;
3357  ++Counter;
3358  }
3359  for (const Expr *E : Data.LastprivateVars) {
3360  Args.push_back(ImplicitParamDecl::Create(
3361  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3362  C.getPointerType(C.getPointerType(E->getType()))
3363  .withConst()
3364  .withRestrict(),
3366  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3367  PrivateVarsPos[VD] = Counter;
3368  ++Counter;
3369  }
3370  for (const VarDecl *VD : Data.PrivateLocals) {
3371  QualType Ty = VD->getType().getNonReferenceType();
3372  if (VD->getType()->isLValueReferenceType())
3373  Ty = C.getPointerType(Ty);
3374  if (isAllocatableDecl(VD))
3375  Ty = C.getPointerType(Ty);
3376  Args.push_back(ImplicitParamDecl::Create(
3377  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3378  C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3380  PrivateVarsPos[VD] = Counter;
3381  ++Counter;
3382  }
3383  const auto &TaskPrivatesMapFnInfo =
3384  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3385  llvm::FunctionType *TaskPrivatesMapTy =
3386  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3387  std::string Name =
3388  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3389  auto *TaskPrivatesMap = llvm::Function::Create(
3390  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3391  &CGM.getModule());
3392  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3393  TaskPrivatesMapFnInfo);
3394  if (CGM.getLangOpts().Optimize) {
3395  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3396  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3397  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3398  }
3399  CodeGenFunction CGF(CGM);
3400  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3401  TaskPrivatesMapFnInfo, Args, Loc, Loc);
3402 
3403  // *privi = &.privates.privi;
3405  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3406  TaskPrivatesArg.getType()->castAs<PointerType>());
3407  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3408  Counter = 0;
3409  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3410  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3411  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3412  LValue RefLVal =
3413  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3414  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3415  RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3416  CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3417  ++Counter;
3418  }
3419  CGF.FinishFunction();
3420  return TaskPrivatesMap;
3421 }
3422 
3423 /// Emit initialization for private variables in task-based directives.
3425  const OMPExecutableDirective &D,
3426  Address KmpTaskSharedsPtr, LValue TDBase,
3427  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3428  QualType SharedsTy, QualType SharedsPtrTy,
3429  const OMPTaskDataTy &Data,
3430  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3431  ASTContext &C = CGF.getContext();
3432  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3433  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3435  ? OMPD_taskloop
3436  : OMPD_task;
3437  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3438  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3439  LValue SrcBase;
3440  bool IsTargetTask =
3443  // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3444  // PointersArray, SizesArray, and MappersArray. The original variables for
3445  // these arrays are not captured and we get their addresses explicitly.
3446  if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3447  (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3448  SrcBase = CGF.MakeAddrLValue(
3450  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3451  CGF.ConvertTypeForMem(SharedsTy)),
3452  SharedsTy);
3453  }
3454  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3455  for (const PrivateDataTy &Pair : Privates) {
3456  // Do not initialize private locals.
3457  if (Pair.second.isLocalPrivate()) {
3458  ++FI;
3459  continue;
3460  }
3461  const VarDecl *VD = Pair.second.PrivateCopy;
3462  const Expr *Init = VD->getAnyInitializer();
3463  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3464  !CGF.isTrivialInitializer(Init)))) {
3465  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3466  if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3467  const VarDecl *OriginalVD = Pair.second.Original;
3468  // Check if the variable is the target-based BasePointersArray,
3469  // PointersArray, SizesArray, or MappersArray.
3470  LValue SharedRefLValue;
3471  QualType Type = PrivateLValue.getType();
3472  const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3473  if (IsTargetTask && !SharedField) {
3474  assert(isa<ImplicitParamDecl>(OriginalVD) &&
3475  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3476  cast<CapturedDecl>(OriginalVD->getDeclContext())
3477  ->getNumParams() == 0 &&
3478  isa<TranslationUnitDecl>(
3479  cast<CapturedDecl>(OriginalVD->getDeclContext())
3480  ->getDeclContext()) &&
3481  "Expected artificial target data variable.");
3482  SharedRefLValue =
3483  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3484  } else if (ForDup) {
3485  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3486  SharedRefLValue = CGF.MakeAddrLValue(
3487  SharedRefLValue.getAddress(CGF).withAlignment(
3488  C.getDeclAlign(OriginalVD)),
3489  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3490  SharedRefLValue.getTBAAInfo());
3491  } else if (CGF.LambdaCaptureFields.count(
3492  Pair.second.Original->getCanonicalDecl()) > 0 ||
3493  isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3494  SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3495  } else {
3496  // Processing for implicitly captured variables.
3497  InlinedOpenMPRegionRAII Region(
3498  CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3499  /*HasCancel=*/false, /*NoInheritance=*/true);
3500  SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3501  }
3502  if (Type->isArrayType()) {
3503  // Initialize firstprivate array.
3504  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3505  // Perform simple memcpy.
3506  CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3507  } else {
3508  // Initialize firstprivate array using element-by-element
3509  // initialization.
3511  PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3512  Type,
3513  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3514  Address SrcElement) {
3515  // Clean up any temporaries needed by the initialization.
3516  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3517  InitScope.addPrivate(Elem, SrcElement);
3518  (void)InitScope.Privatize();
3519  // Emit initialization for single element.
3520  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3521  CGF, &CapturesInfo);
3522  CGF.EmitAnyExprToMem(Init, DestElement,
3523  Init->getType().getQualifiers(),
3524  /*IsInitializer=*/false);
3525  });
3526  }
3527  } else {
3528  CodeGenFunction::OMPPrivateScope InitScope(CGF);
3529  InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3530  (void)InitScope.Privatize();
3531  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3532  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3533  /*capturedByInit=*/false);
3534  }
3535  } else {
3536  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3537  }
3538  }
3539  ++FI;
3540  }
3541 }
3542 
3543 /// Check if duplication function is required for taskloops.
3545  ArrayRef<PrivateDataTy> Privates) {
3546  bool InitRequired = false;
3547  for (const PrivateDataTy &Pair : Privates) {
3548  if (Pair.second.isLocalPrivate())
3549  continue;
3550  const VarDecl *VD = Pair.second.PrivateCopy;
3551  const Expr *Init = VD->getAnyInitializer();
3552  InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3553  !CGF.isTrivialInitializer(Init));
3554  if (InitRequired)
3555  break;
3556  }
3557  return InitRequired;
3558 }
3559 
3560 
3561 /// Emit task_dup function (for initialization of
3562 /// private/firstprivate/lastprivate vars and last_iter flag)
3563 /// \code
3564 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3565 /// lastpriv) {
3566 /// // setup lastprivate flag
3567 /// task_dst->last = lastpriv;
3568 /// // could be constructor calls here...
3569 /// }
3570 /// \endcode
3571 static llvm::Value *
3573  const OMPExecutableDirective &D,
3574  QualType KmpTaskTWithPrivatesPtrQTy,
3575  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3576  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3577  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3578  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3579  ASTContext &C = CGM.getContext();
3580  FunctionArgList Args;
3581  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3582  KmpTaskTWithPrivatesPtrQTy,
3584  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3585  KmpTaskTWithPrivatesPtrQTy,
3587  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3589  Args.push_back(&DstArg);
3590  Args.push_back(&SrcArg);
3591  Args.push_back(&LastprivArg);
3592  const auto &TaskDupFnInfo =
3593  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3594  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3595  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3596  auto *TaskDup = llvm::Function::Create(
3597  TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3598  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3599  TaskDup->setDoesNotRecurse();
3600  CodeGenFunction CGF(CGM);
3601  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3602  Loc);
3603 
3604  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3605  CGF.GetAddrOfLocalVar(&DstArg),
3606  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3607  // task_dst->liter = lastpriv;
3608  if (WithLastIter) {
3609  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3611  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3612  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3613  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3614  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3615  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3616  }
3617 
3618  // Emit initial values for private copies (if any).
3619  assert(!Privates.empty());
3620  Address KmpTaskSharedsPtr = Address::invalid();
3621  if (!Data.FirstprivateVars.empty()) {
3622  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3623  CGF.GetAddrOfLocalVar(&SrcArg),
3624  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3626  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3627  KmpTaskSharedsPtr = Address(
3629  Base, *std::next(KmpTaskTQTyRD->field_begin(),
3630  KmpTaskTShareds)),
3631  Loc),
3632  CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3633  }
3634  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3635  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3636  CGF.FinishFunction();
3637  return TaskDup;
3638 }
3639 
3640 /// Checks if destructor function is required to be generated.
3641 /// \return true if cleanups are required, false otherwise.
3642 static bool
3643 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3644  ArrayRef<PrivateDataTy> Privates) {
3645  for (const PrivateDataTy &P : Privates) {
3646  if (P.second.isLocalPrivate())
3647  continue;
3648  QualType Ty = P.second.Original->getType().getNonReferenceType();
3649  if (Ty.isDestructedType())
3650  return true;
3651  }
3652  return false;
3653 }
3654 
3655 namespace {
3656 /// Loop generator for OpenMP iterator expression.
3657 class OMPIteratorGeneratorScope final
3659  CodeGenFunction &CGF;
3660  const OMPIteratorExpr *E = nullptr;
3663  OMPIteratorGeneratorScope() = delete;
3664  OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3665 
3666 public:
3667  OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3668  : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3669  if (!E)
3670  return;
3672  for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3673  Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3674  const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3675  addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3676  const OMPIteratorHelperData &HelperData = E->getHelper(I);
3677  addPrivate(
3678  HelperData.CounterVD,
3679  CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3680  }
3681  Privatize();
3682 
3683  for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3684  const OMPIteratorHelperData &HelperData = E->getHelper(I);
3685  LValue CLVal =
3686  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3687  HelperData.CounterVD->getType());
3688  // Counter = 0;
3689  CGF.EmitStoreOfScalar(
3690  llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3691  CLVal);
3692  CodeGenFunction::JumpDest &ContDest =
3693  ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3694  CodeGenFunction::JumpDest &ExitDest =
3695  ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3696  // N = <number-of_iterations>;
3697  llvm::Value *N = Uppers[I];
3698  // cont:
3699  // if (Counter < N) goto body; else goto exit;
3700  CGF.EmitBlock(ContDest.getBlock());
3701  auto *CVal =
3702  CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3703  llvm::Value *Cmp =
3705  ? CGF.Builder.CreateICmpSLT(CVal, N)
3706  : CGF.Builder.CreateICmpULT(CVal, N);
3707  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3708  CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3709  // body:
3710  CGF.EmitBlock(BodyBB);
3711  // Iteri = Begini + Counter * Stepi;
3712  CGF.EmitIgnoredExpr(HelperData.Update);
3713  }
3714  }
3715  ~OMPIteratorGeneratorScope() {
3716  if (!E)
3717  return;
3718  for (unsigned I = E->numOfIterators(); I > 0; --I) {
3719  // Counter = Counter + 1;
3720  const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3721  CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3722  // goto cont;
3723  CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3724  // exit:
3725  CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3726  }
3727  }
3728 };
3729 } // namespace
3730 
3731 static std::pair<llvm::Value *, llvm::Value *>
3733  const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3734  llvm::Value *Addr;
3735  if (OASE) {
3736  const Expr *Base = OASE->getBase();
3737  Addr = CGF.EmitScalarExpr(Base);
3738  } else {
3739  Addr = CGF.EmitLValue(E).getPointer(CGF);
3740  }
3741  llvm::Value *SizeVal;
3742  QualType Ty = E->getType();
3743  if (OASE) {
3744  SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3745  for (const Expr *SE : OASE->getDimensions()) {
3746  llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3747  Sz = CGF.EmitScalarConversion(
3748  Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3749  SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3750  }
3751  } else if (const auto *ASE =
3752  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3753  LValue UpAddrLVal =
3754  CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3755  Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3756  llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3757  UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3758  llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3759  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3760  SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3761  } else {
3762  SizeVal = CGF.getTypeSize(Ty);
3763  }
3764  return std::make_pair(Addr, SizeVal);
3765 }
3766 
3767 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3768 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3769  QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3770  if (KmpTaskAffinityInfoTy.isNull()) {
3771  RecordDecl *KmpAffinityInfoRD =
3772  C.buildImplicitRecord("kmp_task_affinity_info_t");
3773  KmpAffinityInfoRD->startDefinition();
3774  addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3775  addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3776  addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3777  KmpAffinityInfoRD->completeDefinition();
3778  KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3779  }
3780 }
3781 
3784  const OMPExecutableDirective &D,
3785  llvm::Function *TaskFunction, QualType SharedsTy,
3786  Address Shareds, const OMPTaskDataTy &Data) {
3787  ASTContext &C = CGM.getContext();
3789  // Aggregate privates and sort them by the alignment.
3790  const auto *I = Data.PrivateCopies.begin();
3791  for (const Expr *E : Data.PrivateVars) {
3792  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3793  Privates.emplace_back(
3794  C.getDeclAlign(VD),
3795  PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3796  /*PrivateElemInit=*/nullptr));
3797  ++I;
3798  }
3799  I = Data.FirstprivateCopies.begin();
3800  const auto *IElemInitRef = Data.FirstprivateInits.begin();
3801  for (const Expr *E : Data.FirstprivateVars) {
3802  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3803  Privates.emplace_back(
3804  C.getDeclAlign(VD),
3805  PrivateHelpersTy(
3806  E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3807  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3808  ++I;
3809  ++IElemInitRef;
3810  }
3811  I = Data.LastprivateCopies.begin();
3812  for (const Expr *E : Data.LastprivateVars) {
3813  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3814  Privates.emplace_back(
3815  C.getDeclAlign(VD),
3816  PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3817  /*PrivateElemInit=*/nullptr));
3818  ++I;
3819  }
3820  for (const VarDecl *VD : Data.PrivateLocals) {
3821  if (isAllocatableDecl(VD))
3822  Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3823  else
3824  Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3825  }
3826  llvm::stable_sort(Privates,
3827  [](const PrivateDataTy &L, const PrivateDataTy &R) {
3828  return L.first > R.first;
3829  });
3830  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3831  // Build type kmp_routine_entry_t (if not built yet).
3832  emitKmpRoutineEntryT(KmpInt32Ty);
3833  // Build type kmp_task_t (if not built yet).
3835  if (SavedKmpTaskloopTQTy.isNull()) {
3837  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3838  }
3840  } else {
3841  assert((D.getDirectiveKind() == OMPD_task ||
3844  "Expected taskloop, task or target directive");
3845  if (SavedKmpTaskTQTy.isNull()) {
3846  SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3847  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3848  }
3850  }
3851  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3852  // Build particular struct kmp_task_t for the given task.
3853  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3855  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3856  QualType KmpTaskTWithPrivatesPtrQTy =
3857  C.getPointerType(KmpTaskTWithPrivatesQTy);
3858  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3859  llvm::Type *KmpTaskTWithPrivatesPtrTy =
3860  KmpTaskTWithPrivatesTy->getPointerTo();
3861  llvm::Value *KmpTaskTWithPrivatesTySize =
3862  CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3863  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3864 
3865  // Emit initial values for private copies (if any).
3866  llvm::Value *TaskPrivatesMap = nullptr;
3867  llvm::Type *TaskPrivatesMapTy =
3868  std::next(TaskFunction->arg_begin(), 3)->getType();
3869  if (!Privates.empty()) {
3870  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3871  TaskPrivatesMap =
3872  emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3873  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3874  TaskPrivatesMap, TaskPrivatesMapTy);
3875  } else {
3876  TaskPrivatesMap = llvm::ConstantPointerNull::get(
3877  cast<llvm::PointerType>(TaskPrivatesMapTy));
3878  }
3879  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3880  // kmp_task_t *tt);
3881  llvm::Function *TaskEntry = emitProxyTaskFunction(
3882  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3883  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3884  TaskPrivatesMap);
3885 
3886  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3887  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3888  // kmp_routine_entry_t *task_entry);
3889  // Task flags. Format is taken from
3890  // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3891  // description of kmp_tasking_flags struct.
3892  enum {
3893  TiedFlag = 0x1,
3894  FinalFlag = 0x2,
3895  DestructorsFlag = 0x8,
3896  PriorityFlag = 0x20,
3897  DetachableFlag = 0x40,
3898  };
3899  unsigned Flags = Data.Tied ? TiedFlag : 0;
3900  bool NeedsCleanup = false;
3901  if (!Privates.empty()) {
3902  NeedsCleanup =
3903  checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3904  if (NeedsCleanup)
3905  Flags = Flags | DestructorsFlag;
3906  }
3907  if (Data.Priority.getInt())
3908  Flags = Flags | PriorityFlag;
3910  Flags = Flags | DetachableFlag;
3911  llvm::Value *TaskFlags =
3912  Data.Final.getPointer()
3913  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3914  CGF.Builder.getInt32(FinalFlag),
3915  CGF.Builder.getInt32(/*C=*/0))
3916  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3917  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3918  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3919  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3920  getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3922  TaskEntry, KmpRoutineEntryPtrTy)};
3923  llvm::Value *NewTask;
3924  if (D.hasClausesOfKind<OMPNowaitClause>()) {
3925  // Check if we have any device clause associated with the directive.
3926  const Expr *Device = nullptr;
3927  if (auto *C = D.getSingleClause<OMPDeviceClause>())
3928  Device = C->getDevice();
3929  // Emit device ID if any otherwise use default value.
3930  llvm::Value *DeviceID;
3931  if (Device)
3932  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3933  CGF.Int64Ty, /*isSigned=*/true);
3934  else
3935  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3936  AllocArgs.push_back(DeviceID);
3937  NewTask = CGF.EmitRuntimeCall(
3938  OMPBuilder.getOrCreateRuntimeFunction(
3939  CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3940  AllocArgs);
3941  } else {
3942  NewTask =
3943  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3944  CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3945  AllocArgs);
3946  }
3947  // Emit detach clause initialization.
3948  // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3949  // task_descriptor);
3950  if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3951  const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3952  LValue EvtLVal = CGF.EmitLValue(Evt);
3953 
3954  // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3955  // int gtid, kmp_task_t *task);
3956  llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3957  llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3958  Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3959  llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3960  OMPBuilder.getOrCreateRuntimeFunction(
3961  CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3962  {Loc, Tid, NewTask});
3963  EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3964  Evt->getExprLoc());
3965  CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3966  }
3967  // Process affinity clauses.
3969  // Process list of affinity data.
3970  ASTContext &C = CGM.getContext();
3971  Address AffinitiesArray = Address::invalid();
3972  // Calculate number of elements to form the array of affinity data.
3973  llvm::Value *NumOfElements = nullptr;
3974  unsigned NumAffinities = 0;
3975  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3976  if (const Expr *Modifier = C->getModifier()) {
3977  const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3978  for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3979  llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3980  Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3981  NumOfElements =
3982  NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3983  }
3984  } else {
3985  NumAffinities += C->varlist_size();
3986  }
3987  }
3989  // Fields ids in kmp_task_affinity_info record.
3990  enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3991 
3992  QualType KmpTaskAffinityInfoArrayTy;
3993  if (NumOfElements) {
3994  NumOfElements = CGF.Builder.CreateNUWAdd(
3995  llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
3996  auto *OVE = new (C) OpaqueValueExpr(
3997  Loc,
3998  C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
3999  VK_PRValue);
4000  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4001  RValue::get(NumOfElements));
4002  KmpTaskAffinityInfoArrayTy =
4003  C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4004  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4005  // Properly emit variable-sized array.
4006  auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4008  CGF.EmitVarDecl(*PD);
4009  AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4010  NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4011  /*isSigned=*/false);
4012  } else {
4013  KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4015  llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4016  ArrayType::Normal, /*IndexTypeQuals=*/0);
4017  AffinitiesArray =
4018  CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4019  AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4020  NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4021  /*isSigned=*/false);
4022  }
4023 
4024  const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4025  // Fill array by elements without iterators.
4026  unsigned Pos = 0;
4027  bool HasIterator = false;
4028  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4029  if (C->getModifier()) {
4030  HasIterator = true;
4031  continue;
4032  }
4033  for (const Expr *E : C->varlists()) {
4034  llvm::Value *Addr;
4035  llvm::Value *Size;
4036  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4037  LValue Base =
4038  CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4040  // affs[i].base_addr = &<Affinities[i].second>;
4041  LValue BaseAddrLVal = CGF.EmitLValueForField(
4042  Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4043  CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4044  BaseAddrLVal);
4045  // affs[i].len = sizeof(<Affinities[i].second>);
4046  LValue LenLVal = CGF.EmitLValueForField(
4047  Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4048  CGF.EmitStoreOfScalar(Size, LenLVal);
4049  ++Pos;
4050  }
4051  }
4052  LValue PosLVal;
4053  if (HasIterator) {
4054  PosLVal = CGF.MakeAddrLValue(
4055  CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4056  C.getSizeType());
4057  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4058  }
4059  // Process elements with iterators.
4060  for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4061  const Expr *Modifier = C->getModifier();
4062  if (!Modifier)
4063  continue;
4064  OMPIteratorGeneratorScope IteratorScope(
4065  CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4066  for (const Expr *E : C->varlists()) {
4067  llvm::Value *Addr;
4068  llvm::Value *Size;
4069  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4070  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4071  LValue Base = CGF.MakeAddrLValue(
4072  CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4073  // affs[i].base_addr = &<Affinities[i].second>;
4074  LValue BaseAddrLVal = CGF.EmitLValueForField(
4075  Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4076  CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4077  BaseAddrLVal);
4078  // affs[i].len = sizeof(<Affinities[i].second>);
4079  LValue LenLVal = CGF.EmitLValueForField(
4080  Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4081  CGF.EmitStoreOfScalar(Size, LenLVal);
4082  Idx = CGF.Builder.CreateNUWAdd(
4083  Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4084  CGF.EmitStoreOfScalar(Idx, PosLVal);
4085  }
4086  }
4087  // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4088  // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4089  // naffins, kmp_task_affinity_info_t *affin_list);
4090  llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4091  llvm::Value *GTid = getThreadID(CGF, Loc);
4092  llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4093  AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4094  // FIXME: Emit the function and ignore its result for now unless the
4095  // runtime function is properly implemented.
4096  (void)CGF.EmitRuntimeCall(
4097  OMPBuilder.getOrCreateRuntimeFunction(
4098  CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4099  {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4100  }
4101  llvm::Value *NewTaskNewTaskTTy =
4103  NewTask, KmpTaskTWithPrivatesPtrTy);
4104  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4105  KmpTaskTWithPrivatesQTy);
4106  LValue TDBase =
4107  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4108  // Fill the data in the resulting kmp_task_t record.
4109  // Copy shareds if there are any.
4110  Address KmpTaskSharedsPtr = Address::invalid();
4111  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4112  KmpTaskSharedsPtr = Address(
4113  CGF.EmitLoadOfScalar(
4114  CGF.EmitLValueForField(
4115  TDBase,
4116  *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4117  Loc),
4118  CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4119  LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4120  LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4121  CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4122  }
4123  // Emit initial values for private copies (if any).
4124  TaskResultTy Result;
4125  if (!Privates.empty()) {
4126  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4127  SharedsTy, SharedsPtrTy, Data, Privates,
4128  /*ForDup=*/false);
4130  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4131  Result.TaskDupFn = emitTaskDupFunction(
4132  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4133  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4134  /*WithLastIter=*/!Data.LastprivateVars.empty());
4135  }
4136  }
4137  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4138  enum { Priority = 0, Destructors = 1 };
4139  // Provide pointer to function with destructors for privates.
4140  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4141  const RecordDecl *KmpCmplrdataUD =
4142  (*FI)->getType()->getAsUnionType()->getDecl();
4143  if (NeedsCleanup) {
4144  llvm::Value *DestructorFn = emitDestructorsFunction(
4145  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4146  KmpTaskTWithPrivatesQTy);
4147  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4148  LValue DestructorsLV = CGF.EmitLValueForField(
4149  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4151  DestructorFn, KmpRoutineEntryPtrTy),
4152  DestructorsLV);
4153  }
4154  // Set priority.
4155  if (Data.Priority.getInt()) {
4156  LValue Data2LV = CGF.EmitLValueForField(
4157  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4158  LValue PriorityLV = CGF.EmitLValueForField(
4159  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4160  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4161  }
4162  Result.NewTask = NewTask;
4163  Result.TaskEntry = TaskEntry;
4164  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4165  Result.TDBase = TDBase;
4166  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4167  return Result;
4168 }
4169 
4170 /// Translates internal dependency kind into the runtime kind.
4171 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4172  RTLDependenceKindTy DepKind;
4173  switch (K) {
4174  case OMPC_DEPEND_in:
4175  DepKind = RTLDependenceKindTy::DepIn;
4176  break;
4177  // Out and InOut dependencies must use the same code.
4178  case OMPC_DEPEND_out:
4179  case OMPC_DEPEND_inout:
4180  DepKind = RTLDependenceKindTy::DepInOut;
4181  break;
4182  case OMPC_DEPEND_mutexinoutset:
4183  DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4184  break;
4185  case OMPC_DEPEND_inoutset:
4186  DepKind = RTLDependenceKindTy::DepInOutSet;
4187  break;
4188  case OMPC_DEPEND_outallmemory:
4189  DepKind = RTLDependenceKindTy::DepOmpAllMem;
4190  break;
4191  case OMPC_DEPEND_source:
4192  case OMPC_DEPEND_sink:
4193  case OMPC_DEPEND_depobj:
4194  case OMPC_DEPEND_inoutallmemory:
4195  case OMPC_DEPEND_unknown:
4196  llvm_unreachable("Unknown task dependence type");
4197  }
4198  return DepKind;
4199 }
4200 
4201 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4202 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4203  QualType &FlagsTy) {
4204  FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4205  if (KmpDependInfoTy.isNull()) {
4206  RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4207  KmpDependInfoRD->startDefinition();
4208  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4209  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4210  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4211  KmpDependInfoRD->completeDefinition();
4212  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4213  }
4214 }
4215 
4216 std::pair<llvm::Value *, LValue>
4218  SourceLocation Loc) {
4219  ASTContext &C = CGM.getContext();
4220  QualType FlagsTy;
4221  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4222  RecordDecl *KmpDependInfoRD =
4223  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4224  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4227  DepobjLVal.getAddress(CGF),
4228  CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4229  KmpDependInfoPtrTy->castAs<PointerType>());
4230  Address DepObjAddr = CGF.Builder.CreateGEP(
4231  Base.getAddress(CGF),
4232  llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4233  LValue NumDepsBase = CGF.MakeAddrLValue(
4234  DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4235  // NumDeps = deps[i].base_addr;
4236  LValue BaseAddrLVal = CGF.EmitLValueForField(
4237  NumDepsBase,
4238  *std::next(KmpDependInfoRD->field_begin(),
4239  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4240  llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4241  return std::make_pair(NumDeps, Base);
4242 }
4243 
4244 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4245  llvm::PointerUnion<unsigned *, LValue *> Pos,
4246  const OMPTaskDataTy::DependData &Data,
4247  Address DependenciesArray) {
4248  CodeGenModule &CGM = CGF.CGM;
4249  ASTContext &C = CGM.getContext();
4250  QualType FlagsTy;
4251  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4252  RecordDecl *KmpDependInfoRD =
4253  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4254  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4255 
4256  OMPIteratorGeneratorScope IteratorScope(
4257  CGF, cast_or_null<OMPIteratorExpr>(
4259  : nullptr));
4260  for (const Expr *E : Data.DepExprs) {
4261  llvm::Value *Addr;
4262  llvm::Value *Size;
4263 
4264  // The expression will be a nullptr in the 'omp_all_memory' case.
4265  if (E) {
4266  std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4267  Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4268  } else {
4269  Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4270  Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4271  }
4272  LValue Base;
4273  if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4274  Base = CGF.MakeAddrLValue(
4275  CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4276  } else {
4277  assert(E && "Expected a non-null expression");
4278  LValue &PosLVal = *Pos.get<LValue *>();
4279  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4280  Base = CGF.MakeAddrLValue(
4281  CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4282  }
4283  // deps[i].base_addr = &<Dependencies[i].second>;
4284  LValue BaseAddrLVal = CGF.EmitLValueForField(
4285  Base,
4286  *std::next(KmpDependInfoRD->field_begin(),
4287  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4288  CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4289  // deps[i].len = sizeof(<Dependencies[i].second>);
4290  LValue LenLVal = CGF.EmitLValueForField(
4291  Base, *std::next(KmpDependInfoRD->field_begin(),
4292  static_cast<unsigned int>(RTLDependInfoFields::Len)));
4293  CGF.EmitStoreOfScalar(Size, LenLVal);
4294  // deps[i].flags = <Dependencies[i].first>;
4295  RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4296  LValue FlagsLVal = CGF.EmitLValueForField(
4297  Base,
4298  *std::next(KmpDependInfoRD->field_begin(),
4299  static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4300  CGF.EmitStoreOfScalar(
4301  llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4302  FlagsLVal);
4303  if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4304  ++(*P);
4305  } else {
4306  LValue &PosLVal = *Pos.get<LValue *>();
4307  llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4308  Idx = CGF.Builder.CreateNUWAdd(Idx,
4309  llvm::ConstantInt::get(Idx->getType(), 1));
4310  CGF.EmitStoreOfScalar(Idx, PosLVal);
4311  }
4312  }
4313 }
4314 
4316  CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4317  const OMPTaskDataTy::DependData &Data) {
4318  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4319  "Expected depobj dependency kind.");
4321  SmallVector<LValue, 4> SizeLVals;
4322  ASTContext &C = CGF.getContext();
4323  {
4324  OMPIteratorGeneratorScope IteratorScope(
4325  CGF, cast_or_null<OMPIteratorExpr>(
4327  : nullptr));
4328  for (const Expr *E : Data.DepExprs) {
4329  llvm::Value *NumDeps;
4330  LValue Base;
4331  LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4332  std::tie(NumDeps, Base) =
4333  getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4334  LValue NumLVal = CGF.MakeAddrLValue(
4335  CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4336  C.getUIntPtrType());
4337  CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4338  NumLVal.getAddress(CGF));
4339  llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4340  llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4341  CGF.EmitStoreOfScalar(Add, NumLVal);
4342  SizeLVals.push_back(NumLVal);
4343  }
4344  }
4345  for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4346  llvm::Value *Size =
4347  CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4348  Sizes.push_back(Size);
4349  }
4350  return Sizes;
4351 }
4352 
4354  QualType &KmpDependInfoTy,
4355  LValue PosLVal,
4356  const OMPTaskDataTy::DependData &Data,
4357  Address DependenciesArray) {
4358  assert(Data.DepKind == OMPC_DEPEND_depobj &&
4359  "Expected depobj dependency kind.");
4360  llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4361  {
4362  OMPIteratorGeneratorScope IteratorScope(
4363  CGF, cast_or_null<OMPIteratorExpr>(
4365  : nullptr));
4366  for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4367  const Expr *E = Data.DepExprs[I];
4368  llvm::Value *NumDeps;
4369  LValue Base;
4370  LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4371  std::tie(NumDeps, Base) =
4372  getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4373 
4374  // memcopy dependency data.
4375  llvm::Value *Size = CGF.Builder.CreateNUWMul(
4376  ElSize,
4377  CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4378  llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4379  Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4380  CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4381 
4382  // Increase pos.
4383  // pos += size;
4384  llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4385  CGF.EmitStoreOfScalar(Add, PosLVal);
4386  }
4387  }
4388 }
4389 
4390 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4392  SourceLocation Loc) {
4393  if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4394  return D.DepExprs.empty();
4395  }))
4396  return std::make_pair(nullptr, Address::invalid());
4397  // Process list of dependencies.
4398  ASTContext &C = CGM.getContext();
4399  Address DependenciesArray = Address::invalid();
4400  llvm::Value *NumOfElements = nullptr;
4401  unsigned NumDependencies = std::accumulate(
4402  Dependencies.begin(), Dependencies.end(), 0,
4403  [](unsigned V, const OMPTaskDataTy::DependData &D) {
4404  return D.DepKind == OMPC_DEPEND_depobj
4405  ? V
4406  : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4407  });
4408  QualType FlagsTy;
4409  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4410  bool HasDepobjDeps = false;
4411  bool HasRegularWithIterators = false;
4412  llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4413  llvm::Value *NumOfRegularWithIterators =
4414  llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4415  // Calculate number of depobj dependencies and regular deps with the
4416  // iterators.
4417  for (const OMPTaskDataTy::DependData &D : Dependencies) {
4418  if (D.DepKind == OMPC_DEPEND_depobj) {
4421  for (llvm::Value *Size : Sizes) {
4422  NumOfDepobjElements =
4423  CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4424  }
4425  HasDepobjDeps = true;
4426  continue;
4427  }
4428  // Include number of iterations, if any.
4429 
4430  if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4431  for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4432  llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4433  Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4434  llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4435  Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4436  NumOfRegularWithIterators =
4437  CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4438  }
4439  HasRegularWithIterators = true;
4440  continue;
4441  }
4442  }
4443 
4444  QualType KmpDependInfoArrayTy;
4445  if (HasDepobjDeps || HasRegularWithIterators) {
4446  NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4447  /*isSigned=*/false);
4448  if (HasDepobjDeps) {
4449  NumOfElements =
4450  CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4451  }
4452  if (HasRegularWithIterators) {
4453  NumOfElements =
4454  CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4455  }
4456  auto *OVE = new (C) OpaqueValueExpr(
4457  Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4458  VK_PRValue);
4459  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4460  RValue::get(NumOfElements));
4461  KmpDependInfoArrayTy =
4462  C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4463  /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4464  // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4465  // Properly emit variable-sized array.
4466  auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4468  CGF.EmitVarDecl(*PD);
4469  DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4470  NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4471  /*isSigned=*/false);
4472  } else {
4473  KmpDependInfoArrayTy = C.getConstantArrayType(
4474  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4475  ArrayType::Normal, /*IndexTypeQuals=*/0);
4476  DependenciesArray =
4477  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4478  DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4479  NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4480  /*isSigned=*/false);
4481  }
4482  unsigned Pos = 0;
4483  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4484  if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4485  Dependencies[I].IteratorExpr)
4486  continue;
4487  emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4488  DependenciesArray);
4489  }
4490  // Copy regular dependencies with iterators.
4491  LValue PosLVal = CGF.MakeAddrLValue(
4492  CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4493  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4494  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4495  if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4496  !Dependencies[I].IteratorExpr)
4497  continue;
4498  emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4499  DependenciesArray);
4500  }
4501  // Copy final depobj arrays without iterators.
4502  if (HasDepobjDeps) {
4503  for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4504  if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4505  continue;
4506  emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4507  DependenciesArray);
4508  }
4509  }
4510  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4511  DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4512  return std::make_pair(NumOfElements, DependenciesArray);
4513 }
4514 
4516  CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4517  SourceLocation Loc) {
4518  if (Dependencies.DepExprs.empty())
4519  return Address::invalid();
4520  // Process list of dependencies.
4521  ASTContext &C = CGM.getContext();
4522  Address DependenciesArray = Address::invalid();
4523  unsigned NumDependencies = Dependencies.DepExprs.size();
4524  QualType FlagsTy;
4525  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4526  RecordDecl *KmpDependInfoRD =
4527  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4528 
4529  llvm::Value *Size;
4530  // Define type kmp_depend_info[<Dependencies.size()>];
4531  // For depobj reserve one extra element to store the number of elements.
4532  // It is required to handle depobj(x) update(in) construct.
4533  // kmp_depend_info[<Dependencies.size()>] deps;
4534  llvm::Value *NumDepsVal;
4535  CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4536  if (const auto *IE =
4537  cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4538  NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4539  for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4540  llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4541  Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4542  NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4543  }
4544  Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4545  NumDepsVal);
4546  CharUnits SizeInBytes =
4547  C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4548  llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4549  Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4550  NumDepsVal =
4551  CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4552  } else {
4553  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4554  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4555  nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4556  CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4557  Size = CGM.getSize(Sz.alignTo(Align));
4558  NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4559  }
4560  // Need to allocate on the dynamic memory.
4561  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4562  // Use default allocator.
4563  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4564  llvm::Value *Args[] = {ThreadID, Size, Allocator};
4565 
4566  llvm::Value *Addr =
4567  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4568  CGM.getModule(), OMPRTL___kmpc_alloc),
4569  Args, ".dep.arr.addr");
4570  llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4572  Addr, KmpDependInfoLlvmTy->getPointerTo());
4573  DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4574  // Write number of elements in the first element of array for depobj.
4575  LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4576  // deps[i].base_addr = NumDependencies;
4577  LValue BaseAddrLVal = CGF.EmitLValueForField(
4578  Base,
4579  *std::next(KmpDependInfoRD->field_begin(),
4580  static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4581  CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4582  llvm::PointerUnion<unsigned *, LValue *> Pos;
4583  unsigned Idx = 1;
4584  LValue PosLVal;
4585  if (Dependencies.IteratorExpr) {
4586  PosLVal = CGF.MakeAddrLValue(
4587  CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4588  C.getSizeType());
4589  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4590  /*IsInit=*/true);
4591  Pos = &PosLVal;
4592  } else {
4593  Pos = &Idx;
4594  }
4595  emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4596  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4597  CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4598  CGF.Int8Ty);
4599  return DependenciesArray;
4600 }
4601 
4603  SourceLocation Loc) {
4604  ASTContext &C = CGM.getContext();
4605  QualType FlagsTy;
4606  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4608  DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4609  QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4611  Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4613  llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4614  Addr.getElementType(), Addr.getPointer(),
4615  llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4616  DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4617  CGF.VoidPtrTy);
4618  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4619  // Use default allocator.
4620  llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4621  llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4622 
4623  // _kmpc_free(gtid, addr, nullptr);
4624  (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4625  CGM.getModule(), OMPRTL___kmpc_free),
4626  Args);
4627 }
4628 
4630  OpenMPDependClauseKind NewDepKind,
4631  SourceLocation Loc) {
4632  ASTContext &C = CGM.getContext();
4633  QualType FlagsTy;
4634  getDependTypes(C, KmpDependInfoTy, FlagsTy);
4635  RecordDecl *KmpDependInfoRD =
4636  cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4637  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4638  llvm::Value *NumDeps;
4639  LValue Base;
4640  std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4641 
4642  Address Begin = Base.getAddress(CGF);
4643  // Cast from pointer to array type to pointer to single element.
4644  llvm::Value *End = CGF.Builder.CreateGEP(
4645  Begin.getElementType(), Begin.getPointer(), NumDeps);
4646  // The basic structure here is a while-do loop.
4647  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4648  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4649  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4650  CGF.EmitBlock(BodyBB);
4651  llvm::PHINode *ElementPHI =
4652  CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4653  ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4654  Begin = Begin.withPointer(ElementPHI);
4655  Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4656  Base.getTBAAInfo());
4657  // deps[i].flags = NewDepKind;
4658  RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4659  LValue FlagsLVal = CGF.EmitLValueForField(
4660  Base, *std::next(KmpDependInfoRD->field_begin(),
4661  static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4662  CGF.EmitStoreOfScalar(
4663  llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4664  FlagsLVal);
4665 
4666  // Shift the address forward by one element.
4667  Address ElementNext =
4668  CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4669  ElementPHI->addIncoming(ElementNext.getPointer(),
4670  CGF.Builder.GetInsertBlock());
4671  llvm::Value *IsEmpty =
4672  CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4673  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4674  // Done.
4675  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4676 }
4677 
4679  const OMPExecutableDirective &D,
4680  llvm::Function *TaskFunction,
4681  QualType SharedsTy, Address Shareds,
4682  const Expr *IfCond,
4683  const OMPTaskDataTy &Data) {
4684  if (!CGF.HaveInsertPoint())
4685  return;
4686 
4687  TaskResultTy Result =
4688  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4689  llvm::Value *NewTask = Result.NewTask;
4690  llvm::Function *TaskEntry = Result.TaskEntry;
4691  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4692  LValue TDBase = Result.TDBase;
4693  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4694  // Process list of dependences.
4695  Address DependenciesArray = Address::invalid();
4696  llvm::Value *NumOfElements;
4697  std::tie(NumOfElements, DependenciesArray) =
4698  emitDependClause(CGF, Data.Dependences, Loc);
4699 
4700  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4701  // libcall.
4702  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4703  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4704  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4705  // list is not empty
4706  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4707  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4708  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4709  llvm::Value *DepTaskArgs[7];
4710  if (!Data.Dependences.empty()) {
4711  DepTaskArgs[0] = UpLoc;
4712  DepTaskArgs[1] = ThreadID;
4713  DepTaskArgs[2] = NewTask;
4714  DepTaskArgs[3] = NumOfElements;
4715  DepTaskArgs[4] = DependenciesArray.getPointer();
4716  DepTaskArgs[5] = CGF.Builder.getInt32(0);
4717  DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4718  }
4719  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4720  &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4721  if (!Data.Tied) {
4722  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4723  LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4724  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4725  }
4726  if (!Data.Dependences.empty()) {
4727  CGF.EmitRuntimeCall(
4728  OMPBuilder.getOrCreateRuntimeFunction(
4729  CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4730  DepTaskArgs);
4731  } else {
4732  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4733  CGM.getModule(), OMPRTL___kmpc_omp_task),
4734  TaskArgs);
4735  }
4736  // Check if parent region is untied and build return for untied task;
4737  if (auto *Region =
4738  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4739  Region->emitUntiedSwitch(CGF);
4740  };
4741 
4742  llvm::Value *DepWaitTaskArgs[6];
4743  if (!Data.Dependences.empty()) {
4744  DepWaitTaskArgs[0] = UpLoc;
4745  DepWaitTaskArgs[1] = ThreadID;
4746  DepWaitTaskArgs[2] = NumOfElements;
4747  DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4748  DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4749  DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4750  }
4751  auto &M = CGM.getModule();
4752  auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4753  TaskEntry, &Data, &DepWaitTaskArgs,
4754  Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4755  CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4756  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4757  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4758  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4759  // is specified.
4760  if (!Data.Dependences.empty())
4761  CGF.EmitRuntimeCall(
4762  OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
4763  DepWaitTaskArgs);
4764  // Call proxy_task_entry(gtid, new_task);
4765  auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4766  Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4767  Action.Enter(CGF);
4768  llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4769  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4770  OutlinedFnArgs);
4771  };
4772 
4773  // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4774  // kmp_task_t *new_task);
4775  // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4776  // kmp_task_t *new_task);
4777  RegionCodeGenTy RCG(CodeGen);
4778  CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4779  M, OMPRTL___kmpc_omp_task_begin_if0),
4780  TaskArgs,
4781  OMPBuilder.getOrCreateRuntimeFunction(
4782  M, OMPRTL___kmpc_omp_task_complete_if0),
4783  TaskArgs);
4784  RCG.setAction(Action);
4785  RCG(CGF);
4786  };
4787 
4788  if (IfCond) {
4789  emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4790  } else {
4791  RegionCodeGenTy ThenRCG(ThenCodeGen);
4792  ThenRCG(CGF);
4793  }
4794 }
4795 
4797  const OMPLoopDirective &D,
4798  llvm::Function *TaskFunction,
4799  QualType SharedsTy, Address Shareds,
4800  const Expr *IfCond,
4801  const OMPTaskDataTy &Data) {
4802  if (!CGF.HaveInsertPoint())
4803  return;
4804  TaskResultTy Result =
4805  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4806  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4807  // libcall.
4808  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4809  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4810  // sched, kmp_uint64 grainsize, void *task_dup);
4811  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4812  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4813  llvm::Value *IfVal;
4814  if (IfCond) {
4815  IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4816  /*isSigned=*/true);
4817  } else {
4818  IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4819  }
4820 
4821  LValue LBLVal = CGF.EmitLValueForField(
4822  Result.TDBase,
4823  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4824  const auto *LBVar =
4825  cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4826  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4827  LBLVal.getQuals(),
4828  /*IsInitializer=*/true);
4829  LValue UBLVal = CGF.EmitLValueForField(
4830  Result.TDBase,
4831  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4832  const auto *UBVar =
4833  cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4834  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4835  UBLVal.getQuals(),
4836  /*IsInitializer=*/true);
4837  LValue StLVal = CGF.EmitLValueForField(
4838  Result.TDBase,
4839  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4840  const auto *StVar =
4841  cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4842  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4843  StLVal.getQuals(),
4844  /*IsInitializer=*/true);
4845  // Store reductions address.
4846  LValue RedLVal = CGF.EmitLValueForField(
4847  Result.TDBase,
4848  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4849  if (Data.Reductions) {
4850  CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4851  } else {
4852  CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4853  CGF.getContext().VoidPtrTy);
4854  }
4855  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4856  llvm::Value *TaskArgs[] = {
4857  UpLoc,
4858  ThreadID,
4859  Result.NewTask,
4860  IfVal,
4861  LBLVal.getPointer(CGF),
4862  UBLVal.getPointer(CGF),
4863  CGF.EmitLoadOfScalar(StLVal, Loc),
4864  llvm::ConstantInt::getSigned(
4865  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4866  llvm::ConstantInt::getSigned(
4867  CGF.IntTy, Data.Schedule.getPointer()
4868  ? Data.Schedule.getInt() ? NumTasks : Grainsize
4869  : NoSchedule),
4870  Data.Schedule.getPointer()
4871  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4872