clang  6.0.0svn
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitmaskEnum.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/CallSite.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/Value.h"
28 #include "llvm/Support/Format.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 
35 namespace {
36 /// \brief Base class for handling code generation inside OpenMP regions.
37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
38 public:
39  /// \brief Kinds of OpenMP regions used in codegen.
40  enum CGOpenMPRegionKind {
41  /// \brief Region with outlined function for standalone 'parallel'
42  /// directive.
43  ParallelOutlinedRegion,
44  /// \brief Region with outlined function for standalone 'task' directive.
45  TaskOutlinedRegion,
46  /// \brief Region for constructs that do not require function outlining,
47  /// like 'for', 'sections', 'atomic' etc. directives.
48  InlinedRegion,
49  /// \brief Region with outlined function for standalone 'target' directive.
50  TargetRegion,
51  };
52 
53  CGOpenMPRegionInfo(const CapturedStmt &CS,
54  const CGOpenMPRegionKind RegionKind,
56  bool HasCancel)
57  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
58  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
59 
60  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
62  bool HasCancel)
63  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
64  Kind(Kind), HasCancel(HasCancel) {}
65 
66  /// \brief Get a variable or parameter for storing global thread id
67  /// inside OpenMP construct.
68  virtual const VarDecl *getThreadIDVariable() const = 0;
69 
70  /// \brief Emit the captured statement body.
71  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
72 
73  /// \brief Get an LValue for the current ThreadID variable.
74  /// \return LValue for thread id variable. This LValue always has type int32*.
75  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
76 
77  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
78 
79  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
80 
81  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
82 
83  bool hasCancel() const { return HasCancel; }
84 
85  static bool classof(const CGCapturedStmtInfo *Info) {
86  return Info->getKind() == CR_OpenMP;
87  }
88 
89  ~CGOpenMPRegionInfo() override = default;
90 
91 protected:
92  CGOpenMPRegionKind RegionKind;
93  RegionCodeGenTy CodeGen;
95  bool HasCancel;
96 };
97 
98 /// \brief API for captured statement code generation in OpenMP constructs.
99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
100 public:
101  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
102  const RegionCodeGenTy &CodeGen,
103  OpenMPDirectiveKind Kind, bool HasCancel,
104  StringRef HelperName)
105  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
106  HasCancel),
107  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
108  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
109  }
110 
111  /// \brief Get a variable or parameter for storing global thread id
112  /// inside OpenMP construct.
113  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
114 
115  /// \brief Get the name of the capture helper.
116  StringRef getHelperName() const override { return HelperName; }
117 
118  static bool classof(const CGCapturedStmtInfo *Info) {
119  return CGOpenMPRegionInfo::classof(Info) &&
120  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
121  ParallelOutlinedRegion;
122  }
123 
124 private:
125  /// \brief A variable or parameter storing global thread id for OpenMP
126  /// constructs.
127  const VarDecl *ThreadIDVar;
128  StringRef HelperName;
129 };
130 
131 /// \brief API for captured statement code generation in OpenMP constructs.
132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
133 public:
134  class UntiedTaskActionTy final : public PrePostActionTy {
135  bool Untied;
136  const VarDecl *PartIDVar;
137  const RegionCodeGenTy UntiedCodeGen;
138  llvm::SwitchInst *UntiedSwitch = nullptr;
139 
140  public:
141  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
142  const RegionCodeGenTy &UntiedCodeGen)
143  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
144  void Enter(CodeGenFunction &CGF) override {
145  if (Untied) {
146  // Emit task switching point.
147  auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
148  CGF.GetAddrOfLocalVar(PartIDVar),
149  PartIDVar->getType()->castAs<PointerType>());
150  auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
151  auto *DoneBB = CGF.createBasicBlock(".untied.done.");
152  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153  CGF.EmitBlock(DoneBB);
155  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157  CGF.Builder.GetInsertBlock());
158  emitUntiedSwitch(CGF);
159  }
160  }
161  void emitUntiedSwitch(CodeGenFunction &CGF) const {
162  if (Untied) {
163  auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
164  CGF.GetAddrOfLocalVar(PartIDVar),
165  PartIDVar->getType()->castAs<PointerType>());
166  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167  PartIdLVal);
168  UntiedCodeGen(CGF);
169  CodeGenFunction::JumpDest CurPoint =
170  CGF.getJumpDestInCurrentScope(".untied.next.");
172  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174  CGF.Builder.GetInsertBlock());
175  CGF.EmitBranchThroughCleanup(CurPoint);
176  CGF.EmitBlock(CurPoint.getBlock());
177  }
178  }
179  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180  };
181  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182  const VarDecl *ThreadIDVar,
183  const RegionCodeGenTy &CodeGen,
184  OpenMPDirectiveKind Kind, bool HasCancel,
185  const UntiedTaskActionTy &Action)
186  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187  ThreadIDVar(ThreadIDVar), Action(Action) {
188  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189  }
190 
191  /// \brief Get a variable or parameter for storing global thread id
192  /// inside OpenMP construct.
193  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195  /// \brief Get an LValue for the current ThreadID variable.
196  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198  /// \brief Get the name of the capture helper.
199  StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201  void emitUntiedSwitch(CodeGenFunction &CGF) override {
202  Action.emitUntiedSwitch(CGF);
203  }
204 
205  static bool classof(const CGCapturedStmtInfo *Info) {
206  return CGOpenMPRegionInfo::classof(Info) &&
207  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208  TaskOutlinedRegion;
209  }
210 
211 private:
212  /// \brief A variable or parameter storing global thread id for OpenMP
213  /// constructs.
214  const VarDecl *ThreadIDVar;
215  /// Action for emitting code for untied tasks.
216  const UntiedTaskActionTy &Action;
217 };
218 
219 /// \brief API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224  const RegionCodeGenTy &CodeGen,
225  OpenMPDirectiveKind Kind, bool HasCancel)
226  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227  OldCSI(OldCSI),
228  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230  // \brief Retrieve the value of the context parameter.
231  llvm::Value *getContextValue() const override {
232  if (OuterRegionInfo)
233  return OuterRegionInfo->getContextValue();
234  llvm_unreachable("No context value for inlined OpenMP region");
235  }
236 
237  void setContextValue(llvm::Value *V) override {
238  if (OuterRegionInfo) {
239  OuterRegionInfo->setContextValue(V);
240  return;
241  }
242  llvm_unreachable("No context value for inlined OpenMP region");
243  }
244 
245  /// \brief Lookup the captured field decl for a variable.
246  const FieldDecl *lookup(const VarDecl *VD) const override {
247  if (OuterRegionInfo)
248  return OuterRegionInfo->lookup(VD);
249  // If there is no outer outlined region,no need to lookup in a list of
250  // captured variables, we can use the original one.
251  return nullptr;
252  }
253 
254  FieldDecl *getThisFieldDecl() const override {
255  if (OuterRegionInfo)
256  return OuterRegionInfo->getThisFieldDecl();
257  return nullptr;
258  }
259 
260  /// \brief Get a variable or parameter for storing global thread id
261  /// inside OpenMP construct.
262  const VarDecl *getThreadIDVariable() const override {
263  if (OuterRegionInfo)
264  return OuterRegionInfo->getThreadIDVariable();
265  return nullptr;
266  }
267 
268  /// \brief Get an LValue for the current ThreadID variable.
269  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270  if (OuterRegionInfo)
271  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272  llvm_unreachable("No LValue for inlined OpenMP construct");
273  }
274 
275  /// \brief Get the name of the capture helper.
276  StringRef getHelperName() const override {
277  if (auto *OuterRegionInfo = getOldCSI())
278  return OuterRegionInfo->getHelperName();
279  llvm_unreachable("No helper name for inlined OpenMP construct");
280  }
281 
282  void emitUntiedSwitch(CodeGenFunction &CGF) override {
283  if (OuterRegionInfo)
284  OuterRegionInfo->emitUntiedSwitch(CGF);
285  }
286 
287  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289  static bool classof(const CGCapturedStmtInfo *Info) {
290  return CGOpenMPRegionInfo::classof(Info) &&
291  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292  }
293 
294  ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297  /// \brief CodeGen info about outer OpenMP region.
299  CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// \brief API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310  const RegionCodeGenTy &CodeGen, StringRef HelperName)
311  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312  /*HasCancel=*/false),
313  HelperName(HelperName) {}
314 
315  /// \brief This is unused for target regions because each starts executing
316  /// with a single thread.
317  const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319  /// \brief Get the name of the capture helper.
320  StringRef getHelperName() const override { return HelperName; }
321 
322  static bool classof(const CGCapturedStmtInfo *Info) {
323  return CGOpenMPRegionInfo::classof(Info) &&
324  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325  }
326 
327 private:
328  StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332  llvm_unreachable("No codegen for expressions");
333 }
334 /// \brief API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340  OMPD_unknown,
341  /*HasCancel=*/false),
342  PrivScope(CGF) {
343  // Make sure the globals captured in the provided statement are local by
344  // using the privatization logic. We assume the same variable is not
345  // captured more than once.
346  for (auto &C : CS.captures()) {
347  if (!C.capturesVariable() && !C.capturesVariableByCopy())
348  continue;
349 
350  const VarDecl *VD = C.getCapturedVar();
351  if (VD->isLocalVarDeclOrParm())
352  continue;
353 
354  DeclRefExpr DRE(const_cast<VarDecl *>(VD),
355  /*RefersToEnclosingVariableOrCapture=*/false,
357  SourceLocation());
358  PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
359  return CGF.EmitLValue(&DRE).getAddress();
360  });
361  }
362  (void)PrivScope.Privatize();
363  }
364 
365  /// \brief Lookup the captured field decl for a variable.
366  const FieldDecl *lookup(const VarDecl *VD) const override {
367  if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
368  return FD;
369  return nullptr;
370  }
371 
372  /// \brief Emit the captured statement body.
373  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
374  llvm_unreachable("No body for expressions");
375  }
376 
377  /// \brief Get a variable or parameter for storing global thread id
378  /// inside OpenMP construct.
379  const VarDecl *getThreadIDVariable() const override {
380  llvm_unreachable("No thread id for expressions");
381  }
382 
383  /// \brief Get the name of the capture helper.
384  StringRef getHelperName() const override {
385  llvm_unreachable("No helper name for expressions");
386  }
387 
388  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
389 
390 private:
391  /// Private scope to capture global variables.
393 };
394 
395 /// \brief RAII for emitting code of OpenMP constructs.
396 class InlinedOpenMPRegionRAII {
397  CodeGenFunction &CGF;
398  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
399  FieldDecl *LambdaThisCaptureField = nullptr;
400 
401 public:
402  /// \brief Constructs region for combined constructs.
403  /// \param CodeGen Code generation sequence for combined directives. Includes
404  /// a list of functions used for code generation of implicitly inlined
405  /// regions.
406  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407  OpenMPDirectiveKind Kind, bool HasCancel)
408  : CGF(CGF) {
409  // Start emission for the construct.
410  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414  CGF.LambdaThisCaptureField = nullptr;
415  }
416 
417  ~InlinedOpenMPRegionRAII() {
418  // Restore original CapturedStmtInfo only if we're done with code emission.
419  auto *OldCSI =
420  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
421  delete CGF.CapturedStmtInfo;
422  CGF.CapturedStmtInfo = OldCSI;
423  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
424  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
425  }
426 };
427 
428 /// \brief Values for bit flags used in the ident_t to describe the fields.
429 /// All enumeric elements are named and described in accordance with the code
430 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
431 enum OpenMPLocationFlags : unsigned {
432  /// \brief Use trampoline for internal microtask.
433  OMP_IDENT_IMD = 0x01,
434  /// \brief Use c-style ident structure.
435  OMP_IDENT_KMPC = 0x02,
436  /// \brief Atomic reduction option for kmpc_reduce.
437  OMP_ATOMIC_REDUCE = 0x10,
438  /// \brief Explicit 'barrier' directive.
439  OMP_IDENT_BARRIER_EXPL = 0x20,
440  /// \brief Implicit barrier in code.
441  OMP_IDENT_BARRIER_IMPL = 0x40,
442  /// \brief Implicit barrier in 'for' directive.
443  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
444  /// \brief Implicit barrier in 'sections' directive.
445  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
446  /// \brief Implicit barrier in 'single' directive.
447  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
448  /// Call of __kmp_for_static_init for static loop.
449  OMP_IDENT_WORK_LOOP = 0x200,
450  /// Call of __kmp_for_static_init for sections.
451  OMP_IDENT_WORK_SECTIONS = 0x400,
452  /// Call of __kmp_for_static_init for distribute.
453  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
454  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
455 };
456 
457 /// \brief Describes ident structure that describes a source location.
458 /// All descriptions are taken from
459 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
460 /// Original structure:
461 /// typedef struct ident {
462 /// kmp_int32 reserved_1; /**< might be used in Fortran;
463 /// see above */
464 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
465 /// KMP_IDENT_KMPC identifies this union
466 /// member */
467 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
468 /// see above */
469 ///#if USE_ITT_BUILD
470 /// /* but currently used for storing
471 /// region-specific ITT */
472 /// /* contextual information. */
473 ///#endif /* USE_ITT_BUILD */
474 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
475 /// C++ */
476 /// char const *psource; /**< String describing the source location.
477 /// The string is composed of semi-colon separated
478 // fields which describe the source file,
479 /// the function and a pair of line numbers that
480 /// delimit the construct.
481 /// */
482 /// } ident_t;
484  /// \brief might be used in Fortran
486  /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
488  /// \brief Not really used in Fortran any more
490  /// \brief Source[4] in Fortran, do not use for C++
492  /// \brief String describing the source location. The string is composed of
493  /// semi-colon separated fields which describe the source file, the function
494  /// and a pair of line numbers that delimit the construct.
496 };
497 
498 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
499 /// the enum sched_type in kmp.h).
501  /// \brief Lower bound for default (unordered) versions.
509  /// static with chunk adjustment (e.g., simd)
511  /// \brief Lower bound for 'ordered' versions.
520  /// \brief dist_schedule types
523  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
524  /// Set if the monotonic schedule modifier was present.
526  /// Set if the nonmonotonic schedule modifier was present.
528 };
529 
531  /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
532  /// kmpc_micro microtask, ...);
534  /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
535  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
537  /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
538  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
540  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
542  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
543  // kmp_critical_name *crit);
545  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
546  // global_tid, kmp_critical_name *crit, uintptr_t hint);
548  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
549  // kmp_critical_name *crit);
551  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
552  // global_tid);
554  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
556  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
558  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
559  // global_tid);
561  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
562  // global_tid);
564  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
565  // kmp_int32 num_threads);
567  // Call to void __kmpc_flush(ident_t *loc);
569  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
571  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
573  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
574  // int end_part);
576  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
578  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
580  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
581  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
582  // kmp_routine_entry_t *task_entry);
584  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
585  // new_task);
587  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
588  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
589  // kmp_int32 didit);
591  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
592  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
593  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
595  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
596  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
597  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
598  // *lck);
600  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
601  // kmp_critical_name *lck);
603  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
604  // kmp_critical_name *lck);
606  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
607  // kmp_task_t * new_task);
609  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
610  // kmp_task_t * new_task);
612  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
614  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
616  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
617  // global_tid);
619  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
621  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
623  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
624  // int proc_bind);
626  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
627  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
628  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
630  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
631  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
632  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
634  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
635  // global_tid, kmp_int32 cncl_kind);
637  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
638  // kmp_int32 cncl_kind);
640  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
641  // kmp_int32 num_teams, kmp_int32 thread_limit);
643  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
644  // microtask, ...);
646  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
647  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
648  // sched, kmp_uint64 grainsize, void *task_dup);
650  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
651  // num_dims, struct kmp_dim *dims);
653  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
655  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
656  // *vec);
658  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
659  // *vec);
661  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
662  // *data);
664  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
665  // *d);
667 
668  //
669  // Offloading related calls
670  //
671  // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
672  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
673  // *arg_types);
675  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
676  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
677  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
679  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
681  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
683  // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
684  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
686  // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
687  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
689  // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
690  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
692 };
693 
694 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
695 /// region.
696 class CleanupTy final : public EHScopeStack::Cleanup {
697  PrePostActionTy *Action;
698 
699 public:
700  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
701  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
702  if (!CGF.HaveInsertPoint())
703  return;
704  Action->Exit(CGF);
705  }
706 };
707 
708 } // anonymous namespace
709 
712  if (PrePostAction) {
713  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
714  Callback(CodeGen, CGF, *PrePostAction);
715  } else {
716  PrePostActionTy Action;
717  Callback(CodeGen, CGF, Action);
718  }
719 }
720 
721 /// Check if the combiner is a call to UDR combiner and if it is so return the
722 /// UDR decl used for reduction.
723 static const OMPDeclareReductionDecl *
724 getReductionInit(const Expr *ReductionOp) {
725  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
726  if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
727  if (auto *DRE =
728  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
729  if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
730  return DRD;
731  return nullptr;
732 }
733 
735  const OMPDeclareReductionDecl *DRD,
736  const Expr *InitOp,
737  Address Private, Address Original,
738  QualType Ty) {
739  if (DRD->getInitializer()) {
740  std::pair<llvm::Function *, llvm::Function *> Reduction =
741  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
742  auto *CE = cast<CallExpr>(InitOp);
743  auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
744  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
745  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
746  auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
747  auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
748  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
749  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
750  [=]() -> Address { return Private; });
751  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
752  [=]() -> Address { return Original; });
753  (void)PrivateScope.Privatize();
754  RValue Func = RValue::get(Reduction.second);
755  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
756  CGF.EmitIgnoredExpr(InitOp);
757  } else {
758  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
759  auto *GV = new llvm::GlobalVariable(
760  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
761  llvm::GlobalValue::PrivateLinkage, Init, ".init");
762  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
763  RValue InitRVal;
764  switch (CGF.getEvaluationKind(Ty)) {
765  case TEK_Scalar:
766  InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
767  break;
768  case TEK_Complex:
769  InitRVal =
771  break;
772  case TEK_Aggregate:
773  InitRVal = RValue::getAggregate(LV.getAddress());
774  break;
775  }
777  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
778  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
779  /*IsInitializer=*/false);
780  }
781 }
782 
783 /// \brief Emit initialization of arrays of complex types.
784 /// \param DestAddr Address of the array.
785 /// \param Type Type of array.
786 /// \param Init Initial expression of array.
787 /// \param SrcAddr Address of the original array.
788 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
789  QualType Type, bool EmitDeclareReductionInit,
790  const Expr *Init,
791  const OMPDeclareReductionDecl *DRD,
792  Address SrcAddr = Address::invalid()) {
793  // Perform element-by-element initialization.
794  QualType ElementTy;
795 
796  // Drill down to the base element type on both arrays.
797  auto ArrayTy = Type->getAsArrayTypeUnsafe();
798  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
799  DestAddr =
800  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
801  if (DRD)
802  SrcAddr =
803  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
804 
805  llvm::Value *SrcBegin = nullptr;
806  if (DRD)
807  SrcBegin = SrcAddr.getPointer();
808  auto DestBegin = DestAddr.getPointer();
809  // Cast from pointer to array type to pointer to single element.
810  auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
811  // The basic structure here is a while-do loop.
812  auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
813  auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
814  auto IsEmpty =
815  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
816  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
817 
818  // Enter the loop body, making that address the current address.
819  auto EntryBB = CGF.Builder.GetInsertBlock();
820  CGF.EmitBlock(BodyBB);
821 
822  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
823 
824  llvm::PHINode *SrcElementPHI = nullptr;
825  Address SrcElementCurrent = Address::invalid();
826  if (DRD) {
827  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
828  "omp.arraycpy.srcElementPast");
829  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
830  SrcElementCurrent =
831  Address(SrcElementPHI,
832  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
833  }
834  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
835  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
836  DestElementPHI->addIncoming(DestBegin, EntryBB);
837  Address DestElementCurrent =
838  Address(DestElementPHI,
839  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
840 
841  // Emit copy.
842  {
843  CodeGenFunction::RunCleanupsScope InitScope(CGF);
844  if (EmitDeclareReductionInit) {
845  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
846  SrcElementCurrent, ElementTy);
847  } else
848  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
849  /*IsInitializer=*/false);
850  }
851 
852  if (DRD) {
853  // Shift the address forward by one element.
854  auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
855  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
856  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
857  }
858 
859  // Shift the address forward by one element.
860  auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
861  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
862  // Check whether we've reached the end.
863  auto Done =
864  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
865  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
866  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
867 
868  // Done.
869  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
870 }
871 
872 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
873  return CGF.EmitOMPSharedLValue(E);
874 }
875 
876 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
877  const Expr *E) {
878  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
879  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
880  return LValue();
881 }
882 
883 void ReductionCodeGen::emitAggregateInitialization(
884  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
885  const OMPDeclareReductionDecl *DRD) {
886  // Emit VarDecl with copy init for arrays.
887  // Get the address of the original variable captured in current
888  // captured region.
889  auto *PrivateVD =
890  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
891  bool EmitDeclareReductionInit =
892  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
893  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
894  EmitDeclareReductionInit,
895  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
896  : PrivateVD->getInit(),
897  DRD, SharedLVal.getAddress());
898 }
899 
902  ArrayRef<const Expr *> ReductionOps) {
903  ClausesData.reserve(Shareds.size());
904  SharedAddresses.reserve(Shareds.size());
905  Sizes.reserve(Shareds.size());
906  BaseDecls.reserve(Shareds.size());
907  auto IPriv = Privates.begin();
908  auto IRed = ReductionOps.begin();
909  for (const auto *Ref : Shareds) {
910  ClausesData.emplace_back(Ref, *IPriv, *IRed);
911  std::advance(IPriv, 1);
912  std::advance(IRed, 1);
913  }
914 }
915 
916 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
917  assert(SharedAddresses.size() == N &&
918  "Number of generated lvalues must be exactly N.");
919  SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
920  emitSharedLValueUB(CGF, ClausesData[N].Ref));
921 }
922 
924  auto *PrivateVD =
925  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
926  QualType PrivateType = PrivateVD->getType();
927  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
928  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
929  Sizes.emplace_back(
930  CGF.getTypeSize(
931  SharedAddresses[N].first.getType().getNonReferenceType()),
932  nullptr);
933  return;
934  }
935  llvm::Value *Size;
936  llvm::Value *SizeInChars;
937  llvm::Type *ElemType =
938  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
939  ->getElementType();
940  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
941  if (AsArraySection) {
942  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
943  SharedAddresses[N].first.getPointer());
944  Size = CGF.Builder.CreateNUWAdd(
945  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
946  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
947  } else {
948  SizeInChars = CGF.getTypeSize(
949  SharedAddresses[N].first.getType().getNonReferenceType());
950  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
951  }
952  Sizes.emplace_back(SizeInChars, Size);
954  CGF,
955  cast<OpaqueValueExpr>(
956  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
957  RValue::get(Size));
958  CGF.EmitVariablyModifiedType(PrivateType);
959 }
960 
962  llvm::Value *Size) {
963  auto *PrivateVD =
964  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
965  QualType PrivateType = PrivateVD->getType();
966  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
967  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
968  assert(!Size && !Sizes[N].second &&
969  "Size should be nullptr for non-variably modified redution "
970  "items.");
971  return;
972  }
974  CGF,
975  cast<OpaqueValueExpr>(
976  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
977  RValue::get(Size));
978  CGF.EmitVariablyModifiedType(PrivateType);
979 }
980 
982  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
983  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
984  assert(SharedAddresses.size() > N && "No variable was generated");
985  auto *PrivateVD =
986  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
987  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
988  QualType PrivateType = PrivateVD->getType();
989  PrivateAddr = CGF.Builder.CreateElementBitCast(
990  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
991  QualType SharedType = SharedAddresses[N].first.getType();
992  SharedLVal = CGF.MakeAddrLValue(
993  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
994  CGF.ConvertTypeForMem(SharedType)),
995  SharedType, SharedAddresses[N].first.getBaseInfo(),
996  CGF.CGM.getTBAAAccessInfo(SharedType));
997  if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
998  CGF.getContext().getAsArrayType(PrivateVD->getType())) {
999  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1000  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1001  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1002  PrivateAddr, SharedLVal.getAddress(),
1003  SharedLVal.getType());
1004  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1005  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1006  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1007  PrivateVD->getType().getQualifiers(),
1008  /*IsInitializer=*/false);
1009  }
1010 }
1011 
1013  auto *PrivateVD =
1014  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1015  QualType PrivateType = PrivateVD->getType();
1016  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1017  return DTorKind != QualType::DK_none;
1018 }
1019 
1021  Address PrivateAddr) {
1022  auto *PrivateVD =
1023  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1024  QualType PrivateType = PrivateVD->getType();
1025  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1026  if (needCleanups(N)) {
1027  PrivateAddr = CGF.Builder.CreateElementBitCast(
1028  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1029  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1030  }
1031 }
1032 
1034  LValue BaseLV) {
1035  BaseTy = BaseTy.getNonReferenceType();
1036  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1037  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1038  if (auto *PtrTy = BaseTy->getAs<PointerType>())
1039  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1040  else {
1041  BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
1042  BaseTy->castAs<ReferenceType>());
1043  }
1044  BaseTy = BaseTy->getPointeeType();
1045  }
1046  return CGF.MakeAddrLValue(
1048  CGF.ConvertTypeForMem(ElTy)),
1049  BaseLV.getType(), BaseLV.getBaseInfo(),
1050  CGF.CGM.getTBAAAccessInfo(BaseLV.getType()));
1051 }
1052 
1054  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1055  llvm::Value *Addr) {
1056  Address Tmp = Address::invalid();
1057  Address TopTmp = Address::invalid();
1058  Address MostTopTmp = Address::invalid();
1059  BaseTy = BaseTy.getNonReferenceType();
1060  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1061  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1062  Tmp = CGF.CreateMemTemp(BaseTy);
1063  if (TopTmp.isValid())
1064  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1065  else
1066  MostTopTmp = Tmp;
1067  TopTmp = Tmp;
1068  BaseTy = BaseTy->getPointeeType();
1069  }
1070  llvm::Type *Ty = BaseLVType;
1071  if (Tmp.isValid())
1072  Ty = Tmp.getElementType();
1073  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1074  if (Tmp.isValid()) {
1075  CGF.Builder.CreateStore(Addr, Tmp);
1076  return MostTopTmp;
1077  }
1078  return Address(Addr, BaseLVAlignment);
1079 }
1080 
1082  Address PrivateAddr) {
1083  const DeclRefExpr *DE;
1084  const VarDecl *OrigVD = nullptr;
1085  if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
1086  auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1087  while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1088  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1089  while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1090  Base = TempASE->getBase()->IgnoreParenImpCasts();
1091  DE = cast<DeclRefExpr>(Base);
1092  OrigVD = cast<VarDecl>(DE->getDecl());
1093  } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
1094  auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1095  while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1096  Base = TempASE->getBase()->IgnoreParenImpCasts();
1097  DE = cast<DeclRefExpr>(Base);
1098  OrigVD = cast<VarDecl>(DE->getDecl());
1099  }
1100  if (OrigVD) {
1101  BaseDecls.emplace_back(OrigVD);
1102  auto OriginalBaseLValue = CGF.EmitLValue(DE);
1103  LValue BaseLValue =
1104  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1105  OriginalBaseLValue);
1106  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1107  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1108  llvm::Value *Ptr =
1109  CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
1110  return castToBase(CGF, OrigVD->getType(),
1111  SharedAddresses[N].first.getType(),
1112  OriginalBaseLValue.getPointer()->getType(),
1113  OriginalBaseLValue.getAlignment(), Ptr);
1114  }
1115  BaseDecls.emplace_back(
1116  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1117  return PrivateAddr;
1118 }
1119 
1121  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1122  return DRD && DRD->getInitializer();
1123 }
1124 
1125 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1126  return CGF.EmitLoadOfPointerLValue(
1127  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1128  getThreadIDVariable()->getType()->castAs<PointerType>());
1129 }
1130 
1131 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1132  if (!CGF.HaveInsertPoint())
1133  return;
1134  // 1.2.2 OpenMP Language Terminology
1135  // Structured block - An executable statement with a single entry at the
1136  // top and a single exit at the bottom.
1137  // The point of exit cannot be a branch out of the structured block.
1138  // longjmp() and throw() must not violate the entry/exit criteria.
1139  CGF.EHStack.pushTerminate();
1140  CodeGen(CGF);
1141  CGF.EHStack.popTerminate();
1142 }
1143 
1144 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1145  CodeGenFunction &CGF) {
1146  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1147  getThreadIDVariable()->getType(),
1149 }
1150 
1152  : CGM(CGM), OffloadEntriesInfoManager(CGM) {
1153  IdentTy = llvm::StructType::create(
1154  "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
1155  CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
1156  CGM.Int8PtrTy /* psource */);
1157  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1158 
1160 }
1161 
1162 void CGOpenMPRuntime::clear() {
1163  InternalVars.clear();
1164 }
1165 
1166 static llvm::Function *
1168  const Expr *CombinerInitializer, const VarDecl *In,
1169  const VarDecl *Out, bool IsCombiner) {
1170  // void .omp_combiner.(Ty *in, Ty *out);
1171  auto &C = CGM.getContext();
1172  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1173  FunctionArgList Args;
1174  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1175  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1176  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1177  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1178  Args.push_back(&OmpOutParm);
1179  Args.push_back(&OmpInParm);
1180  auto &FnInfo =
1181  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1182  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1183  auto *Fn = llvm::Function::Create(
1185  IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
1186  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
1187  Fn->removeFnAttr(llvm::Attribute::NoInline);
1188  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1189  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1190  CodeGenFunction CGF(CGM);
1191  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1192  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1193  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
1195  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1196  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
1197  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1198  .getAddress();
1199  });
1200  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1201  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
1202  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1203  .getAddress();
1204  });
1205  (void)Scope.Privatize();
1206  if (!IsCombiner && Out->hasInit() &&
1207  !CGF.isTrivialInitializer(Out->getInit())) {
1208  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1209  Out->getType().getQualifiers(),
1210  /*IsInitializer=*/true);
1211  }
1212  if (CombinerInitializer)
1213  CGF.EmitIgnoredExpr(CombinerInitializer);
1214  Scope.ForceCleanup();
1215  CGF.FinishFunction();
1216  return Fn;
1217 }
1218 
1220  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1221  if (UDRMap.count(D) > 0)
1222  return;
1223  auto &C = CGM.getContext();
1224  if (!In || !Out) {
1225  In = &C.Idents.get("omp_in");
1226  Out = &C.Idents.get("omp_out");
1227  }
1228  llvm::Function *Combiner = emitCombinerOrInitializer(
1229  CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1230  cast<VarDecl>(D->lookup(Out).front()),
1231  /*IsCombiner=*/true);
1232  llvm::Function *Initializer = nullptr;
1233  if (auto *Init = D->getInitializer()) {
1234  if (!Priv || !Orig) {
1235  Priv = &C.Idents.get("omp_priv");
1236  Orig = &C.Idents.get("omp_orig");
1237  }
1238  Initializer = emitCombinerOrInitializer(
1239  CGM, D->getType(),
1241  : nullptr,
1242  cast<VarDecl>(D->lookup(Orig).front()),
1243  cast<VarDecl>(D->lookup(Priv).front()),
1244  /*IsCombiner=*/false);
1245  }
1246  UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
1247  if (CGF) {
1248  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1249  Decls.second.push_back(D);
1250  }
1251 }
1252 
1253 std::pair<llvm::Function *, llvm::Function *>
1255  auto I = UDRMap.find(D);
1256  if (I != UDRMap.end())
1257  return I->second;
1258  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1259  return UDRMap.lookup(D);
1260 }
1261 
1262 // Layout information for ident_t.
1264  return CGM.getPointerAlign();
1265 }
1267  assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
1268  return CharUnits::fromQuantity(16) + CGM.getPointerSize();
1269 }
1271  // All the fields except the last are i32, so this works beautifully.
1272  return unsigned(Field) * CharUnits::fromQuantity(4);
1273 }
1275  IdentFieldIndex Field,
1276  const llvm::Twine &Name = "") {
1277  auto Offset = getOffsetOfIdentField(Field);
1278  return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
1279 }
1280 
1282  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1283  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1284  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1285  assert(ThreadIDVar->getType()->isPointerType() &&
1286  "thread id variable must be of type kmp_int32 *");
1287  CodeGenFunction CGF(CGM, true);
1288  bool HasCancel = false;
1289  if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1290  HasCancel = OPD->hasCancel();
1291  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1292  HasCancel = OPSD->hasCancel();
1293  else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1294  HasCancel = OPFD->hasCancel();
1295  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1296  HasCancel, OutlinedHelperName);
1297  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1298  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1299 }
1300 
1302  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1303  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1304  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1306  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1307 }
1308 
1310  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1311  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1312  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1314  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1315 }
1316 
1318  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1319  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1320  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1321  bool Tied, unsigned &NumberOfParts) {
1322  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1323  PrePostActionTy &) {
1324  auto *ThreadID = getThreadID(CGF, D.getLocStart());
1325  auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1326  llvm::Value *TaskArgs[] = {
1327  UpLoc, ThreadID,
1328  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1329  TaskTVar->getType()->castAs<PointerType>())
1330  .getPointer()};
1331  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1332  };
1333  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1334  UntiedCodeGen);
1335  CodeGen.setAction(Action);
1336  assert(!ThreadIDVar->getType()->isPointerType() &&
1337  "thread id variable must be of type kmp_int32 for tasks");
1338  auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
1339  auto *TD = dyn_cast<OMPTaskDirective>(&D);
1340  CodeGenFunction CGF(CGM, true);
1341  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1342  InnermostKind,
1343  TD ? TD->hasCancel() : false, Action);
1344  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1345  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
1346  if (!Tied)
1347  NumberOfParts = Action.getNumberOfParts();
1348  return Res;
1349 }
1350 
1351 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1352  CharUnits Align = getIdentAlign(CGM);
1353  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1354  if (!Entry) {
1355  if (!DefaultOpenMPPSource) {
1356  // Initialize default location for psource field of ident_t structure of
1357  // all ident_t objects. Format is ";file;function;line;column;;".
1358  // Taken from
1359  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1360  DefaultOpenMPPSource =
1361  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1362  DefaultOpenMPPSource =
1363  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1364  }
1365 
1366  ConstantInitBuilder builder(CGM);
1367  auto fields = builder.beginStruct(IdentTy);
1368  fields.addInt(CGM.Int32Ty, 0);
1369  fields.addInt(CGM.Int32Ty, Flags);
1370  fields.addInt(CGM.Int32Ty, 0);
1371  fields.addInt(CGM.Int32Ty, 0);
1372  fields.add(DefaultOpenMPPSource);
1373  auto DefaultOpenMPLocation =
1374  fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
1375  llvm::GlobalValue::PrivateLinkage);
1376  DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
1377 
1378  OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1379  }
1380  return Address(Entry, Align);
1381 }
1382 
1384  SourceLocation Loc,
1385  unsigned Flags) {
1386  Flags |= OMP_IDENT_KMPC;
1387  // If no debug info is generated - return global default location.
1388  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1389  Loc.isInvalid())
1390  return getOrCreateDefaultLocation(Flags).getPointer();
1391 
1392  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1393 
1394  Address LocValue = Address::invalid();
1395  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1396  if (I != OpenMPLocThreadIDMap.end())
1397  LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
1398 
1399  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1400  // GetOpenMPThreadID was called before this routine.
1401  if (!LocValue.isValid()) {
1402  // Generate "ident_t .kmpc_loc.addr;"
1403  Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
1404  ".kmpc_loc.addr");
1405  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1406  Elem.second.DebugLoc = AI.getPointer();
1407  LocValue = AI;
1408 
1409  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1410  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1411  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1412  CGM.getSize(getIdentSize(CGF.CGM)));
1413  }
1414 
1415  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1416  Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
1417 
1418  auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1419  if (OMPDebugLoc == nullptr) {
1420  SmallString<128> Buffer2;
1421  llvm::raw_svector_ostream OS2(Buffer2);
1422  // Build debug location
1424  OS2 << ";" << PLoc.getFilename() << ";";
1425  if (const FunctionDecl *FD =
1426  dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
1427  OS2 << FD->getQualifiedNameAsString();
1428  }
1429  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1430  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1431  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1432  }
1433  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1434  CGF.Builder.CreateStore(OMPDebugLoc, PSource);
1435 
1436  // Our callers always pass this to a runtime function, so for
1437  // convenience, go ahead and return a naked pointer.
1438  return LocValue.getPointer();
1439 }
1440 
1442  SourceLocation Loc) {
1443  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1444 
1445  llvm::Value *ThreadID = nullptr;
1446  // Check whether we've already cached a load of the thread id in this
1447  // function.
1448  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1449  if (I != OpenMPLocThreadIDMap.end()) {
1450  ThreadID = I->second.ThreadID;
1451  if (ThreadID != nullptr)
1452  return ThreadID;
1453  }
1454  // If exceptions are enabled, do not use parameter to avoid possible crash.
1455  if (!CGF.getInvokeDest()) {
1456  if (auto *OMPRegionInfo =
1457  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1458  if (OMPRegionInfo->getThreadIDVariable()) {
1459  // Check if this an outlined function with thread id passed as argument.
1460  auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1461  ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1462  // If value loaded in entry block, cache it and use it everywhere in
1463  // function.
1464  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1465  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1466  Elem.second.ThreadID = ThreadID;
1467  }
1468  return ThreadID;
1469  }
1470  }
1471  }
1472 
1473  // This is not an outlined function region - need to call __kmpc_int32
1474  // kmpc_global_thread_num(ident_t *loc).
1475  // Generate thread id value and cache this value for use across the
1476  // function.
1477  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1478  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1479  ThreadID =
1481  emitUpdateLocation(CGF, Loc));
1482  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1483  Elem.second.ThreadID = ThreadID;
1484  return ThreadID;
1485 }
1486 
1488  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1489  if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1490  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1491  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1492  for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1493  UDRMap.erase(D);
1494  }
1495  FunctionUDRMap.erase(CGF.CurFn);
1496  }
1497 }
1498 
1500  if (!IdentTy) {
1501  }
1502  return llvm::PointerType::getUnqual(IdentTy);
1503 }
1504 
1506  if (!Kmpc_MicroTy) {
1507  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1508  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1509  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1510  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1511  }
1512  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1513 }
1514 
1515 llvm::Constant *
1517  llvm::Constant *RTLFn = nullptr;
1518  switch (static_cast<OpenMPRTLFunction>(Function)) {
1519  case OMPRTL__kmpc_fork_call: {
1520  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1521  // microtask, ...);
1522  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1524  llvm::FunctionType *FnTy =
1525  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1526  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1527  break;
1528  }
1530  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1531  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1532  llvm::FunctionType *FnTy =
1533  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1534  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1535  break;
1536  }
1538  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1539  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1540  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1542  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1543  llvm::FunctionType *FnTy =
1544  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1545  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1546  break;
1547  }
1548  case OMPRTL__kmpc_critical: {
1549  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1550  // kmp_critical_name *crit);
1551  llvm::Type *TypeParams[] = {
1553  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1554  llvm::FunctionType *FnTy =
1555  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1556  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1557  break;
1558  }
1560  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1561  // kmp_critical_name *crit, uintptr_t hint);
1562  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1563  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1564  CGM.IntPtrTy};
1565  llvm::FunctionType *FnTy =
1566  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1567  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1568  break;
1569  }
1571  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1572  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1573  // typedef void *(*kmpc_ctor)(void *);
1574  auto KmpcCtorTy =
1575  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1576  /*isVarArg*/ false)->getPointerTo();
1577  // typedef void *(*kmpc_cctor)(void *, void *);
1578  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1579  auto KmpcCopyCtorTy =
1580  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1581  /*isVarArg*/ false)->getPointerTo();
1582  // typedef void (*kmpc_dtor)(void *);
1583  auto KmpcDtorTy =
1584  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1585  ->getPointerTo();
1586  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1587  KmpcCopyCtorTy, KmpcDtorTy};
1588  auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1589  /*isVarArg*/ false);
1590  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1591  break;
1592  }
1594  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1595  // kmp_critical_name *crit);
1596  llvm::Type *TypeParams[] = {
1598  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1599  llvm::FunctionType *FnTy =
1600  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1601  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1602  break;
1603  }
1605  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1606  // global_tid);
1607  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1608  llvm::FunctionType *FnTy =
1609  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1610  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1611  break;
1612  }
1613  case OMPRTL__kmpc_barrier: {
1614  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1615  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1616  llvm::FunctionType *FnTy =
1617  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1618  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1619  break;
1620  }
1622  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1623  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1624  llvm::FunctionType *FnTy =
1625  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1626  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1627  break;
1628  }
1630  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1631  // kmp_int32 num_threads)
1632  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1633  CGM.Int32Ty};
1634  llvm::FunctionType *FnTy =
1635  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1636  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1637  break;
1638  }
1640  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1641  // global_tid);
1642  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1643  llvm::FunctionType *FnTy =
1644  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1645  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1646  break;
1647  }
1649  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1650  // global_tid);
1651  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1652  llvm::FunctionType *FnTy =
1653  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1654  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1655  break;
1656  }
1657  case OMPRTL__kmpc_flush: {
1658  // Build void __kmpc_flush(ident_t *loc);
1659  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1660  llvm::FunctionType *FnTy =
1661  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1662  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1663  break;
1664  }
1665  case OMPRTL__kmpc_master: {
1666  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1667  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1668  llvm::FunctionType *FnTy =
1669  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1670  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1671  break;
1672  }
1673  case OMPRTL__kmpc_end_master: {
1674  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1675  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1676  llvm::FunctionType *FnTy =
1677  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1678  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1679  break;
1680  }
1682  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1683  // int end_part);
1684  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1685  llvm::FunctionType *FnTy =
1686  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1687  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1688  break;
1689  }
1690  case OMPRTL__kmpc_single: {
1691  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1692  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1693  llvm::FunctionType *FnTy =
1694  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1695  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1696  break;
1697  }
1698  case OMPRTL__kmpc_end_single: {
1699  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1700  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1701  llvm::FunctionType *FnTy =
1702  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1703  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1704  break;
1705  }
1707  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1708  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1709  // kmp_routine_entry_t *task_entry);
1710  assert(KmpRoutineEntryPtrTy != nullptr &&
1711  "Type kmp_routine_entry_t must be created.");
1712  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1713  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1714  // Return void * and then cast to particular kmp_task_t type.
1715  llvm::FunctionType *FnTy =
1716  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1717  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1718  break;
1719  }
1720  case OMPRTL__kmpc_omp_task: {
1721  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1722  // *new_task);
1723  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1724  CGM.VoidPtrTy};
1725  llvm::FunctionType *FnTy =
1726  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1727  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1728  break;
1729  }
1730  case OMPRTL__kmpc_copyprivate: {
1731  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1732  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1733  // kmp_int32 didit);
1734  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1735  auto *CpyFnTy =
1736  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1737  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1738  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1739  CGM.Int32Ty};
1740  llvm::FunctionType *FnTy =
1741  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1742  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1743  break;
1744  }
1745  case OMPRTL__kmpc_reduce: {
1746  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1747  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1748  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1749  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1750  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1751  /*isVarArg=*/false);
1752  llvm::Type *TypeParams[] = {
1754  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1755  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1756  llvm::FunctionType *FnTy =
1757  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1758  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1759  break;
1760  }
1762  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1763  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1764  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1765  // *lck);
1766  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1767  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1768  /*isVarArg=*/false);
1769  llvm::Type *TypeParams[] = {
1771  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1772  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1773  llvm::FunctionType *FnTy =
1774  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1775  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1776  break;
1777  }
1778  case OMPRTL__kmpc_end_reduce: {
1779  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1780  // kmp_critical_name *lck);
1781  llvm::Type *TypeParams[] = {
1783  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1784  llvm::FunctionType *FnTy =
1785  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1786  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1787  break;
1788  }
1790  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1791  // kmp_critical_name *lck);
1792  llvm::Type *TypeParams[] = {
1794  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1795  llvm::FunctionType *FnTy =
1796  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1797  RTLFn =
1798  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1799  break;
1800  }
1802  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1803  // *new_task);
1804  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1805  CGM.VoidPtrTy};
1806  llvm::FunctionType *FnTy =
1807  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1808  RTLFn =
1809  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1810  break;
1811  }
1813  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1814  // *new_task);
1815  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1816  CGM.VoidPtrTy};
1817  llvm::FunctionType *FnTy =
1818  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1819  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1820  /*Name=*/"__kmpc_omp_task_complete_if0");
1821  break;
1822  }
1823  case OMPRTL__kmpc_ordered: {
1824  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1825  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1826  llvm::FunctionType *FnTy =
1827  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1828  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1829  break;
1830  }
1831  case OMPRTL__kmpc_end_ordered: {
1832  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1833  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1834  llvm::FunctionType *FnTy =
1835  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1836  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1837  break;
1838  }
1840  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1841  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1842  llvm::FunctionType *FnTy =
1843  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1844  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1845  break;
1846  }
1847  case OMPRTL__kmpc_taskgroup: {
1848  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1849  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1850  llvm::FunctionType *FnTy =
1851  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1852  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1853  break;
1854  }
1856  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1857  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1858  llvm::FunctionType *FnTy =
1859  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1860  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1861  break;
1862  }
1864  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1865  // int proc_bind)
1866  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1867  llvm::FunctionType *FnTy =
1868  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1869  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1870  break;
1871  }
1873  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1874  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1875  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1876  llvm::Type *TypeParams[] = {
1879  llvm::FunctionType *FnTy =
1880  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1881  RTLFn =
1882  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1883  break;
1884  }
1886  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1887  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1888  // kmp_depend_info_t *noalias_dep_list);
1889  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1892  llvm::FunctionType *FnTy =
1893  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1894  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1895  break;
1896  }
1898  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1899  // global_tid, kmp_int32 cncl_kind)
1900  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1901  llvm::FunctionType *FnTy =
1902  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1903  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1904  break;
1905  }
1906  case OMPRTL__kmpc_cancel: {
1907  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1908  // kmp_int32 cncl_kind)
1909  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1910  llvm::FunctionType *FnTy =
1911  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1912  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1913  break;
1914  }
1916  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1917  // kmp_int32 num_teams, kmp_int32 num_threads)
1918  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1919  CGM.Int32Ty};
1920  llvm::FunctionType *FnTy =
1921  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1922  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1923  break;
1924  }
1925  case OMPRTL__kmpc_fork_teams: {
1926  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1927  // microtask, ...);
1928  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1930  llvm::FunctionType *FnTy =
1931  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1932  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1933  break;
1934  }
1935  case OMPRTL__kmpc_taskloop: {
1936  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1937  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1938  // sched, kmp_uint64 grainsize, void *task_dup);
1939  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1940  CGM.IntTy,
1941  CGM.VoidPtrTy,
1942  CGM.IntTy,
1943  CGM.Int64Ty->getPointerTo(),
1944  CGM.Int64Ty->getPointerTo(),
1945  CGM.Int64Ty,
1946  CGM.IntTy,
1947  CGM.IntTy,
1948  CGM.Int64Ty,
1949  CGM.VoidPtrTy};
1950  llvm::FunctionType *FnTy =
1951  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1952  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1953  break;
1954  }
1956  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1957  // num_dims, struct kmp_dim *dims);
1958  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1959  CGM.Int32Ty,
1960  CGM.Int32Ty,
1961  CGM.VoidPtrTy};
1962  llvm::FunctionType *FnTy =
1963  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1964  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1965  break;
1966  }
1968  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1969  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1970  llvm::FunctionType *FnTy =
1971  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1972  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1973  break;
1974  }
1976  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1977  // *vec);
1978  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1979  CGM.Int64Ty->getPointerTo()};
1980  llvm::FunctionType *FnTy =
1981  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1982  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1983  break;
1984  }
1986  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1987  // *vec);
1988  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1989  CGM.Int64Ty->getPointerTo()};
1990  llvm::FunctionType *FnTy =
1991  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1992  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1993  break;
1994  }
1996  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
1997  // *data);
1998  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
1999  llvm::FunctionType *FnTy =
2000  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2001  RTLFn =
2002  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2003  break;
2004  }
2006  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2007  // *d);
2008  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2009  llvm::FunctionType *FnTy =
2010  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2011  RTLFn = CGM.CreateRuntimeFunction(
2012  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2013  break;
2014  }
2015  case OMPRTL__tgt_target: {
2016  // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
2017  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
2018  // *arg_types);
2019  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2020  CGM.VoidPtrTy,
2021  CGM.Int32Ty,
2022  CGM.VoidPtrPtrTy,
2023  CGM.VoidPtrPtrTy,
2024  CGM.SizeTy->getPointerTo(),
2025  CGM.Int32Ty->getPointerTo()};
2026  llvm::FunctionType *FnTy =
2027  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2028  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2029  break;
2030  }
2031  case OMPRTL__tgt_target_teams: {
2032  // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
2033  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2034  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
2035  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2036  CGM.VoidPtrTy,
2037  CGM.Int32Ty,
2038  CGM.VoidPtrPtrTy,
2039  CGM.VoidPtrPtrTy,
2040  CGM.SizeTy->getPointerTo(),
2041  CGM.Int32Ty->getPointerTo(),
2042  CGM.Int32Ty,
2043  CGM.Int32Ty};
2044  llvm::FunctionType *FnTy =
2045  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2046  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2047  break;
2048  }
2049  case OMPRTL__tgt_register_lib: {
2050  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2051  QualType ParamTy =
2053  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2054  llvm::FunctionType *FnTy =
2055  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2056  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2057  break;
2058  }
2060  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2061  QualType ParamTy =
2063  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2064  llvm::FunctionType *FnTy =
2065  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2066  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2067  break;
2068  }
2070  // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
2071  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2072  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2073  CGM.Int32Ty,
2074  CGM.VoidPtrPtrTy,
2075  CGM.VoidPtrPtrTy,
2076  CGM.SizeTy->getPointerTo(),
2077  CGM.Int32Ty->getPointerTo()};
2078  llvm::FunctionType *FnTy =
2079  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2080  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2081  break;
2082  }
2084  // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
2085  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2086  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2087  CGM.Int32Ty,
2088  CGM.VoidPtrPtrTy,
2089  CGM.VoidPtrPtrTy,
2090  CGM.SizeTy->getPointerTo(),
2091  CGM.Int32Ty->getPointerTo()};
2092  llvm::FunctionType *FnTy =
2093  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2094  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2095  break;
2096  }
2098  // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
2099  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2100  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2101  CGM.Int32Ty,
2102  CGM.VoidPtrPtrTy,
2103  CGM.VoidPtrPtrTy,
2104  CGM.SizeTy->getPointerTo(),
2105  CGM.Int32Ty->getPointerTo()};
2106  llvm::FunctionType *FnTy =
2107  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2108  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2109  break;
2110  }
2111  }
2112  assert(RTLFn && "Unable to find OpenMP runtime function");
2113  return RTLFn;
2114 }
2115 
2116 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2117  bool IVSigned) {
2118  assert((IVSize == 32 || IVSize == 64) &&
2119  "IV size is not compatible with the omp runtime");
2120  auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2121  : "__kmpc_for_static_init_4u")
2122  : (IVSigned ? "__kmpc_for_static_init_8"
2123  : "__kmpc_for_static_init_8u");
2124  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2125  auto PtrTy = llvm::PointerType::getUnqual(ITy);
2126  llvm::Type *TypeParams[] = {
2127  getIdentTyPointerTy(), // loc
2128  CGM.Int32Ty, // tid
2129  CGM.Int32Ty, // schedtype
2130  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2131  PtrTy, // p_lower
2132  PtrTy, // p_upper
2133  PtrTy, // p_stride
2134  ITy, // incr
2135  ITy // chunk
2136  };
2137  llvm::FunctionType *FnTy =
2138  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2139  return CGM.CreateRuntimeFunction(FnTy, Name);
2140 }
2141 
2142 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2143  bool IVSigned) {
2144  assert((IVSize == 32 || IVSize == 64) &&
2145  "IV size is not compatible with the omp runtime");
2146  auto Name =
2147  IVSize == 32
2148  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2149  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2150  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2151  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2152  CGM.Int32Ty, // tid
2153  CGM.Int32Ty, // schedtype
2154  ITy, // lower
2155  ITy, // upper
2156  ITy, // stride
2157  ITy // chunk
2158  };
2159  llvm::FunctionType *FnTy =
2160  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2161  return CGM.CreateRuntimeFunction(FnTy, Name);
2162 }
2163 
2164 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2165  bool IVSigned) {
2166  assert((IVSize == 32 || IVSize == 64) &&
2167  "IV size is not compatible with the omp runtime");
2168  auto Name =
2169  IVSize == 32
2170  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2171  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2172  llvm::Type *TypeParams[] = {
2173  getIdentTyPointerTy(), // loc
2174  CGM.Int32Ty, // tid
2175  };
2176  llvm::FunctionType *FnTy =
2177  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2178  return CGM.CreateRuntimeFunction(FnTy, Name);
2179 }
2180 
2181 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2182  bool IVSigned) {
2183  assert((IVSize == 32 || IVSize == 64) &&
2184  "IV size is not compatible with the omp runtime");
2185  auto Name =
2186  IVSize == 32
2187  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2188  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2189  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2190  auto PtrTy = llvm::PointerType::getUnqual(ITy);
2191  llvm::Type *TypeParams[] = {
2192  getIdentTyPointerTy(), // loc
2193  CGM.Int32Ty, // tid
2194  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2195  PtrTy, // p_lower
2196  PtrTy, // p_upper
2197  PtrTy // p_stride
2198  };
2199  llvm::FunctionType *FnTy =
2200  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2201  return CGM.CreateRuntimeFunction(FnTy, Name);
2202 }
2203 
2204 llvm::Constant *
2206  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2208  // Lookup the entry, lazily creating it if necessary.
2210  Twine(CGM.getMangledName(VD)) + ".cache.");
2211 }
2212 
2214  const VarDecl *VD,
2215  Address VDAddr,
2216  SourceLocation Loc) {
2217  if (CGM.getLangOpts().OpenMPUseTLS &&
2219  return VDAddr;
2220 
2221  auto VarTy = VDAddr.getElementType();
2222  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2223  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2224  CGM.Int8PtrTy),
2227  return Address(CGF.EmitRuntimeCall(
2229  VDAddr.getAlignment());
2230 }
2231 
2233  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2234  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2235  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2236  // library.
2237  auto OMPLoc = emitUpdateLocation(CGF, Loc);
2239  OMPLoc);
2240  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2241  // to register constructor/destructor for variable.
2242  llvm::Value *Args[] = {OMPLoc,
2243  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2244  CGM.VoidPtrTy),
2245  Ctor, CopyCtor, Dtor};
2246  CGF.EmitRuntimeCall(
2248 }
2249 
2251  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2252  bool PerformInit, CodeGenFunction *CGF) {
2253  if (CGM.getLangOpts().OpenMPUseTLS &&
2255  return nullptr;
2256 
2257  VD = VD->getDefinition(CGM.getContext());
2258  if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2259  ThreadPrivateWithDefinition.insert(VD);
2260  QualType ASTTy = VD->getType();
2261 
2262  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2263  auto Init = VD->getAnyInitializer();
2264  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2265  // Generate function that re-emits the declaration's initializer into the
2266  // threadprivate copy of the variable VD
2267  CodeGenFunction CtorCGF(CGM);
2268  FunctionArgList Args;
2271  Args.push_back(&Dst);
2272 
2274  CGM.getContext().VoidPtrTy, Args);
2275  auto FTy = CGM.getTypes().GetFunctionType(FI);
2277  FTy, ".__kmpc_global_ctor_.", FI, Loc);
2278  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2279  Args, SourceLocation());
2280  auto ArgVal = CtorCGF.EmitLoadOfScalar(
2281  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2282  CGM.getContext().VoidPtrTy, Dst.getLocation());
2283  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2284  Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
2285  CtorCGF.ConvertTypeForMem(ASTTy));
2286  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2287  /*IsInitializer=*/true);
2288  ArgVal = CtorCGF.EmitLoadOfScalar(
2289  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2290  CGM.getContext().VoidPtrTy, Dst.getLocation());
2291  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2292  CtorCGF.FinishFunction();
2293  Ctor = Fn;
2294  }
2295  if (VD->getType().isDestructedType() != QualType::DK_none) {
2296  // Generate function that emits destructor call for the threadprivate copy
2297  // of the variable VD
2298  CodeGenFunction DtorCGF(CGM);
2299  FunctionArgList Args;
2302  Args.push_back(&Dst);
2303 
2305  CGM.getContext().VoidTy, Args);
2306  auto FTy = CGM.getTypes().GetFunctionType(FI);
2308  FTy, ".__kmpc_global_dtor_.", FI, Loc);
2309  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2310  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2311  SourceLocation());
2312  // Create a scope with an artificial location for the body of this function.
2313  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2314  auto ArgVal = DtorCGF.EmitLoadOfScalar(
2315  DtorCGF.GetAddrOfLocalVar(&Dst),
2316  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2317  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2318  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2319  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2320  DtorCGF.FinishFunction();
2321  Dtor = Fn;
2322  }
2323  // Do not emit init function if it is not required.
2324  if (!Ctor && !Dtor)
2325  return nullptr;
2326 
2327  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2328  auto CopyCtorTy =
2329  llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2330  /*isVarArg=*/false)->getPointerTo();
2331  // Copying constructor for the threadprivate variable.
2332  // Must be NULL - reserved by runtime, but currently it requires that this
2333  // parameter is always NULL. Otherwise it fires assertion.
2334  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2335  if (Ctor == nullptr) {
2336  auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2337  /*isVarArg=*/false)->getPointerTo();
2338  Ctor = llvm::Constant::getNullValue(CtorTy);
2339  }
2340  if (Dtor == nullptr) {
2341  auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2342  /*isVarArg=*/false)->getPointerTo();
2343  Dtor = llvm::Constant::getNullValue(DtorTy);
2344  }
2345  if (!CGF) {
2346  auto InitFunctionTy =
2347  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2348  auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2349  InitFunctionTy, ".__omp_threadprivate_init_.",
2351  CodeGenFunction InitCGF(CGM);
2352  FunctionArgList ArgList;
2353  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2354  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2355  Loc);
2356  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2357  InitCGF.FinishFunction();
2358  return InitFunction;
2359  }
2360  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2361  }
2362  return nullptr;
2363 }
2364 
2366  QualType VarType,
2367  StringRef Name) {
2368  llvm::Twine VarName(Name, ".artificial.");
2369  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2370  llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
2371  llvm::Value *Args[] = {
2373  getThreadID(CGF, SourceLocation()),
2375  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2376  /*IsSigned=*/false),
2377  getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
2378  return Address(
2380  CGF.EmitRuntimeCall(
2382  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2383  CGM.getPointerAlign());
2384 }
2385 
2386 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
2387 /// function. Here is the logic:
2388 /// if (Cond) {
2389 /// ThenGen();
2390 /// } else {
2391 /// ElseGen();
2392 /// }
2394  const RegionCodeGenTy &ThenGen,
2395  const RegionCodeGenTy &ElseGen) {
2396  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2397 
2398  // If the condition constant folds and can be elided, try to avoid emitting
2399  // the condition and the dead arm of the if/else.
2400  bool CondConstant;
2401  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2402  if (CondConstant)
2403  ThenGen(CGF);
2404  else
2405  ElseGen(CGF);
2406  return;
2407  }
2408 
2409  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2410  // emit the conditional branch.
2411  auto ThenBlock = CGF.createBasicBlock("omp_if.then");
2412  auto ElseBlock = CGF.createBasicBlock("omp_if.else");
2413  auto ContBlock = CGF.createBasicBlock("omp_if.end");
2414  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2415 
2416  // Emit the 'then' code.
2417  CGF.EmitBlock(ThenBlock);
2418  ThenGen(CGF);
2419  CGF.EmitBranch(ContBlock);
2420  // Emit the 'else' code if present.
2421  // There is no need to emit line number for unconditional branch.
2423  CGF.EmitBlock(ElseBlock);
2424  ElseGen(CGF);
2425  // There is no need to emit line number for unconditional branch.
2427  CGF.EmitBranch(ContBlock);
2428  // Emit the continuation block for code after the if.
2429  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2430 }
2431 
2433  llvm::Value *OutlinedFn,
2434  ArrayRef<llvm::Value *> CapturedVars,
2435  const Expr *IfCond) {
2436  if (!CGF.HaveInsertPoint())
2437  return;
2438  auto *RTLoc = emitUpdateLocation(CGF, Loc);
2439  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2440  PrePostActionTy &) {
2441  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2442  auto &RT = CGF.CGM.getOpenMPRuntime();
2443  llvm::Value *Args[] = {
2444  RTLoc,
2445  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2446  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2448  RealArgs.append(std::begin(Args), std::end(Args));
2449  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2450 
2451  auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2452  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2453  };
2454  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2455  PrePostActionTy &) {
2456  auto &RT = CGF.CGM.getOpenMPRuntime();
2457  auto ThreadID = RT.getThreadID(CGF, Loc);
2458  // Build calls:
2459  // __kmpc_serialized_parallel(&Loc, GTid);
2460  llvm::Value *Args[] = {RTLoc, ThreadID};
2461  CGF.EmitRuntimeCall(
2462  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2463 
2464  // OutlinedFn(&GTid, &zero, CapturedStruct);
2465  auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2466  Address ZeroAddr =
2467  CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
2468  /*Name*/ ".zero.addr");
2469  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2470  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2471  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2472  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2473  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2474  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2475 
2476  // __kmpc_end_serialized_parallel(&Loc, GTid);
2477  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2478  CGF.EmitRuntimeCall(
2479  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2480  EndArgs);
2481  };
2482  if (IfCond)
2483  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2484  else {
2485  RegionCodeGenTy ThenRCG(ThenGen);
2486  ThenRCG(CGF);
2487  }
2488 }
2489 
2490 // If we're inside an (outlined) parallel region, use the region info's
2491 // thread-ID variable (it is passed in a first argument of the outlined function
2492 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2493 // regular serial code region, get thread ID by calling kmp_int32
2494 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2495 // return the address of that temp.
2497  SourceLocation Loc) {
2498  if (auto *OMPRegionInfo =
2499  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2500  if (OMPRegionInfo->getThreadIDVariable())
2501  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2502 
2503  auto ThreadID = getThreadID(CGF, Loc);
2504  auto Int32Ty =
2505  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2506  auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2507  CGF.EmitStoreOfScalar(ThreadID,
2508  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2509 
2510  return ThreadIDTemp;
2511 }
2512 
2513 llvm::Constant *
2515  const llvm::Twine &Name) {
2516  SmallString<256> Buffer;
2517  llvm::raw_svector_ostream Out(Buffer);
2518  Out << Name;
2519  auto RuntimeName = Out.str();
2520  auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2521  if (Elem.second) {
2522  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2523  "OMP internal variable has different type than requested");
2524  return &*Elem.second;
2525  }
2526 
2527  return Elem.second = new llvm::GlobalVariable(
2528  CGM.getModule(), Ty, /*IsConstant*/ false,
2529  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2530  Elem.first());
2531 }
2532 
2534  llvm::Twine Name(".gomp_critical_user_", CriticalName);
2535  return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2536 }
2537 
2538 namespace {
2539 /// Common pre(post)-action for different OpenMP constructs.
2540 class CommonActionTy final : public PrePostActionTy {
2541  llvm::Value *EnterCallee;
2542  ArrayRef<llvm::Value *> EnterArgs;
2543  llvm::Value *ExitCallee;
2544  ArrayRef<llvm::Value *> ExitArgs;
2545  bool Conditional;
2546  llvm::BasicBlock *ContBlock = nullptr;
2547 
2548 public:
2549  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2550  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2551  bool Conditional = false)
2552  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2553  ExitArgs(ExitArgs), Conditional(Conditional) {}
2554  void Enter(CodeGenFunction &CGF) override {
2555  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2556  if (Conditional) {
2557  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2558  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2559  ContBlock = CGF.createBasicBlock("omp_if.end");
2560  // Generate the branch (If-stmt)
2561  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2562  CGF.EmitBlock(ThenBlock);
2563  }
2564  }
2565  void Done(CodeGenFunction &CGF) {
2566  // Emit the rest of blocks/branches
2567  CGF.EmitBranch(ContBlock);
2568  CGF.EmitBlock(ContBlock, true);
2569  }
2570  void Exit(CodeGenFunction &CGF) override {
2571  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2572  }
2573 };
2574 } // anonymous namespace
2575 
2577  StringRef CriticalName,
2578  const RegionCodeGenTy &CriticalOpGen,
2579  SourceLocation Loc, const Expr *Hint) {
2580  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2581  // CriticalOpGen();
2582  // __kmpc_end_critical(ident_t *, gtid, Lock);
2583  // Prepare arguments and build a call to __kmpc_critical
2584  if (!CGF.HaveInsertPoint())
2585  return;
2586  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2587  getCriticalRegionLock(CriticalName)};
2588  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2589  std::end(Args));
2590  if (Hint) {
2591  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2592  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2593  }
2594  CommonActionTy Action(
2598  CriticalOpGen.setAction(Action);
2599  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2600 }
2601 
2603  const RegionCodeGenTy &MasterOpGen,
2604  SourceLocation Loc) {
2605  if (!CGF.HaveInsertPoint())
2606  return;
2607  // if(__kmpc_master(ident_t *, gtid)) {
2608  // MasterOpGen();
2609  // __kmpc_end_master(ident_t *, gtid);
2610  // }
2611  // Prepare arguments and build a call to __kmpc_master
2612  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2613  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2615  /*Conditional=*/true);
2616  MasterOpGen.setAction(Action);
2617  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2618  Action.Done(CGF);
2619 }
2620 
2622  SourceLocation Loc) {
2623  if (!CGF.HaveInsertPoint())
2624  return;
2625  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2626  llvm::Value *Args[] = {
2627  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2628  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2630  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2631  Region->emitUntiedSwitch(CGF);
2632 }
2633 
2635  const RegionCodeGenTy &TaskgroupOpGen,
2636  SourceLocation Loc) {
2637  if (!CGF.HaveInsertPoint())
2638  return;
2639  // __kmpc_taskgroup(ident_t *, gtid);
2640  // TaskgroupOpGen();
2641  // __kmpc_end_taskgroup(ident_t *, gtid);
2642  // Prepare arguments and build a call to __kmpc_taskgroup
2643  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2644  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2646  Args);
2647  TaskgroupOpGen.setAction(Action);
2648  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2649 }
2650 
2651 /// Given an array of pointers to variables, project the address of a
2652 /// given variable.
2654  unsigned Index, const VarDecl *Var) {
2655  // Pull out the pointer to the variable.
2656  Address PtrAddr =
2657  CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2658  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2659 
2660  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2661  Addr = CGF.Builder.CreateElementBitCast(
2662  Addr, CGF.ConvertTypeForMem(Var->getType()));
2663  return Addr;
2664 }
2665 
2667  CodeGenModule &CGM, llvm::Type *ArgsType,
2668  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2669  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2670  auto &C = CGM.getContext();
2671  // void copy_func(void *LHSArg, void *RHSArg);
2672  FunctionArgList Args;
2673  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2674  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2675  Args.push_back(&LHSArg);
2676  Args.push_back(&RHSArg);
2677  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2678  auto *Fn = llvm::Function::Create(
2680  ".omp.copyprivate.copy_func", &CGM.getModule());
2681  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2682  CodeGenFunction CGF(CGM);
2683  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2684  // Dest = (void*[n])(LHSArg);
2685  // Src = (void*[n])(RHSArg);
2687  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2688  ArgsType), CGF.getPointerAlign());
2690  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2691  ArgsType), CGF.getPointerAlign());
2692  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2693  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2694  // ...
2695  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2696  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2697  auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2698  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2699 
2700  auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2701  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2702 
2703  auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2704  QualType Type = VD->getType();
2705  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2706  }
2707  CGF.FinishFunction();
2708  return Fn;
2709 }
2710 
2712  const RegionCodeGenTy &SingleOpGen,
2713  SourceLocation Loc,
2714  ArrayRef<const Expr *> CopyprivateVars,
2715  ArrayRef<const Expr *> SrcExprs,
2716  ArrayRef<const Expr *> DstExprs,
2717  ArrayRef<const Expr *> AssignmentOps) {
2718  if (!CGF.HaveInsertPoint())
2719  return;
2720  assert(CopyprivateVars.size() == SrcExprs.size() &&
2721  CopyprivateVars.size() == DstExprs.size() &&
2722  CopyprivateVars.size() == AssignmentOps.size());
2723  auto &C = CGM.getContext();
2724  // int32 did_it = 0;
2725  // if(__kmpc_single(ident_t *, gtid)) {
2726  // SingleOpGen();
2727  // __kmpc_end_single(ident_t *, gtid);
2728  // did_it = 1;
2729  // }
2730  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2731  // <copy_func>, did_it);
2732 
2733  Address DidIt = Address::invalid();
2734  if (!CopyprivateVars.empty()) {
2735  // int32 did_it = 0;
2736  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2737  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2738  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2739  }
2740  // Prepare arguments and build a call to __kmpc_single
2741  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2742  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2744  /*Conditional=*/true);
2745  SingleOpGen.setAction(Action);
2746  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2747  if (DidIt.isValid()) {
2748  // did_it = 1;
2749  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2750  }
2751  Action.Done(CGF);
2752  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2753  // <copy_func>, did_it);
2754  if (DidIt.isValid()) {
2755  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2756  auto CopyprivateArrayTy =
2757  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2758  /*IndexTypeQuals=*/0);
2759  // Create a list of all private variables for copyprivate.
2760  Address CopyprivateList =
2761  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2762  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2763  Address Elem = CGF.Builder.CreateConstArrayGEP(
2764  CopyprivateList, I, CGF.getPointerSize());
2765  CGF.Builder.CreateStore(
2767  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2768  Elem);
2769  }
2770  // Build function that copies private values from single region to all other
2771  // threads in the corresponding parallel region.
2772  auto *CpyFn = emitCopyprivateCopyFunction(
2773  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2774  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2775  auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2776  Address CL =
2777  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2778  CGF.VoidPtrTy);
2779  auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2780  llvm::Value *Args[] = {
2781  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2782  getThreadID(CGF, Loc), // i32 <gtid>
2783  BufSize, // size_t <buf_size>
2784  CL.getPointer(), // void *<copyprivate list>
2785  CpyFn, // void (*) (void *, void *) <copy_func>
2786  DidItVal // i32 did_it
2787  };
2789  }
2790 }
2791 
2793  const RegionCodeGenTy &OrderedOpGen,
2794  SourceLocation Loc, bool IsThreads) {
2795  if (!CGF.HaveInsertPoint())
2796  return;
2797  // __kmpc_ordered(ident_t *, gtid);
2798  // OrderedOpGen();
2799  // __kmpc_end_ordered(ident_t *, gtid);
2800  // Prepare arguments and build a call to __kmpc_ordered
2801  if (IsThreads) {
2802  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2803  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2805  Args);
2806  OrderedOpGen.setAction(Action);
2807  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2808  return;
2809  }
2810  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2811 }
2812 
2814  OpenMPDirectiveKind Kind, bool EmitChecks,
2815  bool ForceSimpleCall) {
2816  if (!CGF.HaveInsertPoint())
2817  return;
2818  // Build call __kmpc_cancel_barrier(loc, thread_id);
2819  // Build call __kmpc_barrier(loc, thread_id);
2820  unsigned Flags;
2821  if (Kind == OMPD_for)
2822  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2823  else if (Kind == OMPD_sections)
2824  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2825  else if (Kind == OMPD_single)
2826  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2827  else if (Kind == OMPD_barrier)
2828  Flags = OMP_IDENT_BARRIER_EXPL;
2829  else
2830  Flags = OMP_IDENT_BARRIER_IMPL;
2831  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2832  // thread_id);
2833  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2834  getThreadID(CGF, Loc)};
2835  if (auto *OMPRegionInfo =
2836  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2837  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2838  auto *Result = CGF.EmitRuntimeCall(
2840  if (EmitChecks) {
2841  // if (__kmpc_cancel_barrier()) {
2842  // exit from construct;
2843  // }
2844  auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2845  auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2846  auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2847  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2848  CGF.EmitBlock(ExitBB);
2849  // exit from construct;
2850  auto CancelDestination =
2851  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2852  CGF.EmitBranchThroughCleanup(CancelDestination);
2853  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2854  }
2855  return;
2856  }
2857  }
2859 }
2860 
2861 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2863  bool Chunked, bool Ordered) {
2864  switch (ScheduleKind) {
2865  case OMPC_SCHEDULE_static:
2866  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2867  : (Ordered ? OMP_ord_static : OMP_sch_static);
2868  case OMPC_SCHEDULE_dynamic:
2870  case OMPC_SCHEDULE_guided:
2872  case OMPC_SCHEDULE_runtime:
2873  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2874  case OMPC_SCHEDULE_auto:
2875  return Ordered ? OMP_ord_auto : OMP_sch_auto;
2876  case OMPC_SCHEDULE_unknown:
2877  assert(!Chunked && "chunk was specified but schedule kind not known");
2878  return Ordered ? OMP_ord_static : OMP_sch_static;
2879  }
2880  llvm_unreachable("Unexpected runtime schedule");
2881 }
2882 
2883 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2884 static OpenMPSchedType
2886  // only static is allowed for dist_schedule
2888 }
2889 
2891  bool Chunked) const {
2892  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2893  return Schedule == OMP_sch_static;
2894 }
2895 
2897  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2898  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2899  return Schedule == OMP_dist_sch_static;
2900 }
2901 
2902 
2904  auto Schedule =
2905  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2906  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2907  return Schedule != OMP_sch_static;
2908 }
2909 
2913  int Modifier = 0;
2914  switch (M1) {
2915  case OMPC_SCHEDULE_MODIFIER_monotonic:
2916  Modifier = OMP_sch_modifier_monotonic;
2917  break;
2918  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2919  Modifier = OMP_sch_modifier_nonmonotonic;
2920  break;
2921  case OMPC_SCHEDULE_MODIFIER_simd:
2922  if (Schedule == OMP_sch_static_chunked)
2924  break;
2927  break;
2928  }
2929  switch (M2) {
2930  case OMPC_SCHEDULE_MODIFIER_monotonic:
2931  Modifier = OMP_sch_modifier_monotonic;
2932  break;
2933  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2934  Modifier = OMP_sch_modifier_nonmonotonic;
2935  break;
2936  case OMPC_SCHEDULE_MODIFIER_simd:
2937  if (Schedule == OMP_sch_static_chunked)
2939  break;
2942  break;
2943  }
2944  return Schedule | Modifier;
2945 }
2946 
2948  CodeGenFunction &CGF, SourceLocation Loc,
2949  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2950  bool Ordered, const DispatchRTInput &DispatchValues) {
2951  if (!CGF.HaveInsertPoint())
2952  return;
2954  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2955  assert(Ordered ||
2956  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2957  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2958  Schedule != OMP_sch_static_balanced_chunked));
2959  // Call __kmpc_dispatch_init(
2960  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2961  // kmp_int[32|64] lower, kmp_int[32|64] upper,
2962  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2963 
2964  // If the Chunk was not specified in the clause - use default value 1.
2965  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2966  : CGF.Builder.getIntN(IVSize, 1);
2967  llvm::Value *Args[] = {
2968  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2969  CGF.Builder.getInt32(addMonoNonMonoModifier(
2970  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2971  DispatchValues.LB, // Lower
2972  DispatchValues.UB, // Upper
2973  CGF.Builder.getIntN(IVSize, 1), // Stride
2974  Chunk // Chunk
2975  };
2976  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2977 }
2978 
2980  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2981  llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2983  const CGOpenMPRuntime::StaticRTInput &Values) {
2984  if (!CGF.HaveInsertPoint())
2985  return;
2986 
2987  assert(!Values.Ordered);
2988  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2989  Schedule == OMP_sch_static_balanced_chunked ||
2990  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2991  Schedule == OMP_dist_sch_static ||
2992  Schedule == OMP_dist_sch_static_chunked);
2993 
2994  // Call __kmpc_for_static_init(
2995  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2996  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2997  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2998  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2999  llvm::Value *Chunk = Values.Chunk;
3000  if (Chunk == nullptr) {
3001  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3002  Schedule == OMP_dist_sch_static) &&
3003  "expected static non-chunked schedule");
3004  // If the Chunk was not specified in the clause - use default value 1.
3005  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3006  } else {
3007  assert((Schedule == OMP_sch_static_chunked ||
3008  Schedule == OMP_sch_static_balanced_chunked ||
3009  Schedule == OMP_ord_static_chunked ||
3010  Schedule == OMP_dist_sch_static_chunked) &&
3011  "expected static chunked schedule");
3012  }
3013  llvm::Value *Args[] = {
3014  UpdateLocation,
3015  ThreadId,
3016  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3017  M2)), // Schedule type
3018  Values.IL.getPointer(), // &isLastIter
3019  Values.LB.getPointer(), // &LB
3020  Values.UB.getPointer(), // &UB
3021  Values.ST.getPointer(), // &Stride
3022  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3023  Chunk // Chunk
3024  };
3025  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3026 }
3027 
3029  SourceLocation Loc,
3030  OpenMPDirectiveKind DKind,
3031  const OpenMPScheduleTy &ScheduleKind,
3032  const StaticRTInput &Values) {
3033  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3034  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3035  assert(isOpenMPWorksharingDirective(DKind) &&
3036  "Expected loop-based or sections-based directive.");
3037  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3038  isOpenMPLoopDirective(DKind)
3039  ? OMP_IDENT_WORK_LOOP
3040  : OMP_IDENT_WORK_SECTIONS);
3041  auto *ThreadId = getThreadID(CGF, Loc);
3042  auto *StaticInitFunction =
3044  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3045  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3046 }
3047 
3049  CodeGenFunction &CGF, SourceLocation Loc,
3050  OpenMPDistScheduleClauseKind SchedKind,
3051  const CGOpenMPRuntime::StaticRTInput &Values) {
3052  OpenMPSchedType ScheduleNum =
3053  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3054  auto *UpdatedLocation =
3055  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3056  auto *ThreadId = getThreadID(CGF, Loc);
3057  auto *StaticInitFunction =
3058  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3059  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3060  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3062 }
3063 
3065  SourceLocation Loc,
3066  OpenMPDirectiveKind DKind) {
3067  if (!CGF.HaveInsertPoint())
3068  return;
3069  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3070  llvm::Value *Args[] = {
3071  emitUpdateLocation(CGF, Loc,
3073  ? OMP_IDENT_WORK_DISTRIBUTE
3074  : isOpenMPLoopDirective(DKind)
3075  ? OMP_IDENT_WORK_LOOP
3076  : OMP_IDENT_WORK_SECTIONS),
3077  getThreadID(CGF, Loc)};
3079  Args);
3080 }
3081 
3083  SourceLocation Loc,
3084  unsigned IVSize,
3085  bool IVSigned) {
3086  if (!CGF.HaveInsertPoint())
3087  return;
3088  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3089  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3090  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3091 }
3092 
3094  SourceLocation Loc, unsigned IVSize,
3095  bool IVSigned, Address IL,
3096  Address LB, Address UB,
3097  Address ST) {
3098  // Call __kmpc_dispatch_next(
3099  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3100  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3101  // kmp_int[32|64] *p_stride);
3102  llvm::Value *Args[] = {
3103  emitUpdateLocation(CGF, Loc),
3104  getThreadID(CGF, Loc),
3105  IL.getPointer(), // &isLastIter
3106  LB.getPointer(), // &Lower
3107  UB.getPointer(), // &Upper
3108  ST.getPointer() // &Stride
3109  };
3110  llvm::Value *Call =
3111  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3112  return CGF.EmitScalarConversion(
3113  Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
3114  CGF.getContext().BoolTy, Loc);
3115 }
3116 
3118  llvm::Value *NumThreads,
3119  SourceLocation Loc) {
3120  if (!CGF.HaveInsertPoint())
3121  return;
3122  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3123  llvm::Value *Args[] = {
3124  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3125  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3127  Args);
3128 }
3129 
3131  OpenMPProcBindClauseKind ProcBind,
3132  SourceLocation Loc) {
3133  if (!CGF.HaveInsertPoint())
3134  return;
3135  // Constants for proc bind value accepted by the runtime.
3136  enum ProcBindTy {
3137  ProcBindFalse = 0,
3138  ProcBindTrue,
3139  ProcBindMaster,
3140  ProcBindClose,
3141  ProcBindSpread,
3142  ProcBindIntel,
3143  ProcBindDefault
3144  } RuntimeProcBind;
3145  switch (ProcBind) {
3146  case OMPC_PROC_BIND_master:
3147  RuntimeProcBind = ProcBindMaster;
3148  break;
3149  case OMPC_PROC_BIND_close:
3150  RuntimeProcBind = ProcBindClose;
3151  break;
3152  case OMPC_PROC_BIND_spread:
3153  RuntimeProcBind = ProcBindSpread;
3154  break;
3156  llvm_unreachable("Unsupported proc_bind value.");
3157  }
3158  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3159  llvm::Value *Args[] = {
3160  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3161  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3163 }
3164 
3166  SourceLocation Loc) {
3167  if (!CGF.HaveInsertPoint())
3168  return;
3169  // Build call void __kmpc_flush(ident_t *loc)
3171  emitUpdateLocation(CGF, Loc));
3172 }
3173 
3174 namespace {
3175 /// \brief Indexes of fields for type kmp_task_t.
3177  /// \brief List of shared variables.
3178  KmpTaskTShareds,
3179  /// \brief Task routine.
3180  KmpTaskTRoutine,
3181  /// \brief Partition id for the untied tasks.
3182  KmpTaskTPartId,
3183  /// Function with call of destructors for private variables.
3184  Data1,
3185  /// Task priority.
3186  Data2,
3187  /// (Taskloops only) Lower bound.
3188  KmpTaskTLowerBound,
3189  /// (Taskloops only) Upper bound.
3190  KmpTaskTUpperBound,
3191  /// (Taskloops only) Stride.
3192  KmpTaskTStride,
3193  /// (Taskloops only) Is last iteration flag.
3194  KmpTaskTLastIter,
3195  /// (Taskloops only) Reduction data.
3196  KmpTaskTReductions,
3197 };
3198 } // anonymous namespace
3199 
3200 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3201  // FIXME: Add other entries type when they become supported.
3202  return OffloadEntriesTargetRegion.empty();
3203 }
3204 
3205 /// \brief Initialize target region entry.
3206 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3207  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3208  StringRef ParentName, unsigned LineNum,
3209  unsigned Order) {
3210  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3211  "only required for the device "
3212  "code generation.");
3213  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3214  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3215  /*Flags=*/0);
3216  ++OffloadingEntriesNum;
3217 }
3218 
3219 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3220  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3221  StringRef ParentName, unsigned LineNum,
3222  llvm::Constant *Addr, llvm::Constant *ID,
3223  int32_t Flags) {
3224  // If we are emitting code for a target, the entry is already initialized,
3225  // only has to be registered.
3226  if (CGM.getLangOpts().OpenMPIsDevice) {
3227  assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3228  "Entry must exist.");
3229  auto &Entry =
3230  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3231  assert(Entry.isValid() && "Entry not initialized!");
3232  Entry.setAddress(Addr);
3233  Entry.setID(ID);
3234  Entry.setFlags(Flags);
3235  return;
3236  } else {
3237  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags);
3238  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3239  }
3240 }
3241 
3242 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3243  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3244  unsigned LineNum) const {
3245  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3246  if (PerDevice == OffloadEntriesTargetRegion.end())
3247  return false;
3248  auto PerFile = PerDevice->second.find(FileID);
3249  if (PerFile == PerDevice->second.end())
3250  return false;
3251  auto PerParentName = PerFile->second.find(ParentName);
3252  if (PerParentName == PerFile->second.end())
3253  return false;
3254  auto PerLine = PerParentName->second.find(LineNum);
3255  if (PerLine == PerParentName->second.end())
3256  return false;
3257  // Fail if this entry is already registered.
3258  if (PerLine->second.getAddress() || PerLine->second.getID())
3259  return false;
3260  return true;
3261 }
3262 
3263 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3264  const OffloadTargetRegionEntryInfoActTy &Action) {
3265  // Scan all target region entries and perform the provided action.
3266  for (auto &D : OffloadEntriesTargetRegion)
3267  for (auto &F : D.second)
3268  for (auto &P : F.second)
3269  for (auto &L : P.second)
3270  Action(D.first, F.first, P.first(), L.first, L.second);
3271 }
3272 
3273 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
3274 /// \a Codegen. This is used to emit the two functions that register and
3275 /// unregister the descriptor of the current compilation unit.
3276 static llvm::Function *
3278  const RegionCodeGenTy &Codegen) {
3279  auto &C = CGM.getContext();
3280  FunctionArgList Args;
3281  ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3282  Args.push_back(&DummyPtr);
3283 
3284  CodeGenFunction CGF(CGM);
3285  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3286  auto FTy = CGM.getTypes().GetFunctionType(FI);
3287  auto *Fn =
3288  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
3289  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
3290  Codegen(CGF);
3291  CGF.FinishFunction();
3292  return Fn;
3293 }
3294 
3295 llvm::Function *
3297 
3298  // If we don't have entries or if we are emitting code for the device, we
3299  // don't need to do anything.
3300  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3301  return nullptr;
3302 
3303  auto &M = CGM.getModule();
3304  auto &C = CGM.getContext();
3305 
3306  // Get list of devices we care about
3307  auto &Devices = CGM.getLangOpts().OMPTargetTriples;
3308 
3309  // We should be creating an offloading descriptor only if there are devices
3310  // specified.
3311  assert(!Devices.empty() && "No OpenMP offloading devices??");
3312 
3313  // Create the external variables that will point to the begin and end of the
3314  // host entries section. These will be defined by the linker.
3315  auto *OffloadEntryTy =
3317  llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
3318  M, OffloadEntryTy, /*isConstant=*/true,
3319  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3320  ".omp_offloading.entries_begin");
3321  llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
3322  M, OffloadEntryTy, /*isConstant=*/true,
3323  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3324  ".omp_offloading.entries_end");
3325 
3326  // Create all device images
3327  auto *DeviceImageTy = cast<llvm::StructType>(
3329  ConstantInitBuilder DeviceImagesBuilder(CGM);
3330  auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
3331 
3332  for (unsigned i = 0; i < Devices.size(); ++i) {
3333  StringRef T = Devices[i].getTriple();
3334  auto *ImgBegin = new llvm::GlobalVariable(
3335  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3336  /*Initializer=*/nullptr,
3337  Twine(".omp_offloading.img_start.") + Twine(T));
3338  auto *ImgEnd = new llvm::GlobalVariable(
3339  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3340  /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
3341 
3342  auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
3343  Dev.add(ImgBegin);
3344  Dev.add(ImgEnd);
3345  Dev.add(HostEntriesBegin);
3346  Dev.add(HostEntriesEnd);
3347  Dev.finishAndAddTo(DeviceImagesEntries);
3348  }
3349 
3350  // Create device images global array.
3351  llvm::GlobalVariable *DeviceImages =
3352  DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
3353  CGM.getPointerAlign(),
3354  /*isConstant=*/true);
3355  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3356 
3357  // This is a Zero array to be used in the creation of the constant expressions
3358  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3359  llvm::Constant::getNullValue(CGM.Int32Ty)};
3360 
3361  // Create the target region descriptor.
3362  auto *BinaryDescriptorTy = cast<llvm::StructType>(
3364  ConstantInitBuilder DescBuilder(CGM);
3365  auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
3366  DescInit.addInt(CGM.Int32Ty, Devices.size());
3367  DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3368  DeviceImages,
3369  Index));
3370  DescInit.add(HostEntriesBegin);
3371  DescInit.add(HostEntriesEnd);
3372 
3373  auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
3374  CGM.getPointerAlign(),
3375  /*isConstant=*/true);
3376 
3377  // Emit code to register or unregister the descriptor at execution
3378  // startup or closing, respectively.
3379 
3380  // Create a variable to drive the registration and unregistration of the
3381  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3382  auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
3383  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
3384  IdentInfo, C.CharTy, ImplicitParamDecl::Other);
3385 
3387  CGM, ".omp_offloading.descriptor_unreg",
3388  [&](CodeGenFunction &CGF, PrePostActionTy &) {
3390  Desc);
3391  });
3393  CGM, ".omp_offloading.descriptor_reg",
3394  [&](CodeGenFunction &CGF, PrePostActionTy &) {
3396  Desc);
3397  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3398  });
3399  if (CGM.supportsCOMDAT()) {
3400  // It is sufficient to call registration function only once, so create a
3401  // COMDAT group for registration/unregistration functions and associated
3402  // data. That would reduce startup time and code size. Registration
3403  // function serves as a COMDAT group key.
3404  auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
3405  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3406  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3407  RegFn->setComdat(ComdatKey);
3408  UnRegFn->setComdat(ComdatKey);
3409  DeviceImages->setComdat(ComdatKey);
3410  Desc->setComdat(ComdatKey);
3411  }
3412  return RegFn;
3413 }
3414 
3415 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
3416  llvm::Constant *Addr, uint64_t Size,
3417  int32_t Flags) {
3418  StringRef Name = Addr->getName();
3419  auto *TgtOffloadEntryType = cast<llvm::StructType>(
3421  llvm::LLVMContext &C = CGM.getModule().getContext();
3422  llvm::Module &M = CGM.getModule();
3423 
3424  // Make sure the address has the right type.
3425  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
3426 
3427  // Create constant string with the name.
3428  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3429 
3430  llvm::GlobalVariable *Str =
3431  new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
3433  ".omp_offloading.entry_name");
3434  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3435  llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
3436 
3437  // We can't have any padding between symbols, so we need to have 1-byte
3438  // alignment.
3439  auto Align = CharUnits::fromQuantity(1);
3440 
3441  // Create the entry struct.
3442  ConstantInitBuilder EntryBuilder(CGM);
3443  auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
3444  EntryInit.add(AddrPtr);
3445  EntryInit.add(StrPtr);
3446  EntryInit.addInt(CGM.SizeTy, Size);
3447  EntryInit.addInt(CGM.Int32Ty, Flags);
3448  EntryInit.addInt(CGM.Int32Ty, 0);
3449  llvm::GlobalVariable *Entry =
3450  EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
3451  Align,
3452  /*constant*/ true,
3454 
3455  // The entry has to be created in the section the linker expects it to be.
3456  Entry->setSection(".omp_offloading.entries");
3457 }
3458 
3460  // Emit the offloading entries and metadata so that the device codegen side
3461  // can easily figure out what to emit. The produced metadata looks like
3462  // this:
3463  //
3464  // !omp_offload.info = !{!1, ...}
3465  //
3466  // Right now we only generate metadata for function that contain target
3467  // regions.
3468 
3469  // If we do not have entries, we dont need to do anything.
3471  return;
3472 
3473  llvm::Module &M = CGM.getModule();
3474  llvm::LLVMContext &C = M.getContext();
3476  OrderedEntries(OffloadEntriesInfoManager.size());
3477 
3478  // Create the offloading info metadata node.
3479  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3480 
3481  // Auxiliary methods to create metadata values and strings.
3482  auto getMDInt = [&](unsigned v) {
3483  return llvm::ConstantAsMetadata::get(
3484  llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
3485  };
3486 
3487  auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
3488 
3489  // Create function that emits metadata for each target region entry;
3490  auto &&TargetRegionMetadataEmitter = [&](
3491  unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
3494  // Generate metadata for target regions. Each entry of this metadata
3495  // contains:
3496  // - Entry 0 -> Kind of this type of metadata (0).
3497  // - Entry 1 -> Device ID of the file where the entry was identified.
3498  // - Entry 2 -> File ID of the file where the entry was identified.
3499  // - Entry 3 -> Mangled name of the function where the entry was identified.
3500  // - Entry 4 -> Line in the file where the entry was identified.
3501  // - Entry 5 -> Order the entry was created.
3502  // The first element of the metadata node is the kind.
3503  Ops.push_back(getMDInt(E.getKind()));
3504  Ops.push_back(getMDInt(DeviceID));
3505  Ops.push_back(getMDInt(FileID));
3506  Ops.push_back(getMDString(ParentName));
3507  Ops.push_back(getMDInt(Line));
3508  Ops.push_back(getMDInt(E.getOrder()));
3509 
3510  // Save this entry in the right position of the ordered entries array.
3511  OrderedEntries[E.getOrder()] = &E;
3512 
3513  // Add metadata to the named metadata node.
3514  MD->addOperand(llvm::MDNode::get(C, Ops));
3515  };
3516 
3518  TargetRegionMetadataEmitter);
3519 
3520  for (auto *E : OrderedEntries) {
3521  assert(E && "All ordered entries must exist!");
3522  if (auto *CE =
3523  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3524  E)) {
3525  assert(CE->getID() && CE->getAddress() &&
3526  "Entry ID and Addr are invalid!");
3527  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
3528  } else
3529  llvm_unreachable("Unsupported entry kind.");
3530  }
3531 }
3532 
3533 /// \brief Loads all the offload entries information from the host IR
3534 /// metadata.
3536  // If we are in target mode, load the metadata from the host IR. This code has
3537  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3538 
3539  if (!CGM.getLangOpts().OpenMPIsDevice)
3540  return;
3541 
3542  if (CGM.getLangOpts().OMPHostIRFile.empty())
3543  return;
3544 
3545  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3546  if (Buf.getError())
3547  return;
3548 
3549  llvm::LLVMContext C;
3550  auto ME = expectedToErrorOrAndEmitErrors(
3551  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3552 
3553  if (ME.getError())
3554  return;
3555 
3556  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3557  if (!MD)
3558  return;
3559 
3560  for (auto I : MD->operands()) {
3561  llvm::MDNode *MN = cast<llvm::MDNode>(I);
3562 
3563  auto getMDInt = [&](unsigned Idx) {
3564  llvm::ConstantAsMetadata *V =
3565  cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3566  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3567  };
3568 
3569  auto getMDString = [&](unsigned Idx) {
3570  llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3571  return V->getString();
3572  };
3573 
3574  switch (getMDInt(0)) {
3575  default:
3576  llvm_unreachable("Unexpected metadata!");
3577  break;
3581  /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3582  /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3583  /*Order=*/getMDInt(5));
3584  break;
3585  }
3586  }
3587 }
3588 
3590  if (!KmpRoutineEntryPtrTy) {
3591  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3592  auto &C = CGM.getContext();
3593  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3595  KmpRoutineEntryPtrQTy = C.getPointerType(
3596  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3597  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3598  }
3599 }
3600 
3602  QualType FieldTy) {
3603  auto *Field = FieldDecl::Create(
3604  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3606  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3607  Field->setAccess(AS_public);
3608  DC->addDecl(Field);
3609  return Field;
3610 }
3611 
3613 
3614  // Make sure the type of the entry is already created. This is the type we
3615  // have to create:
3616  // struct __tgt_offload_entry{
3617  // void *addr; // Pointer to the offload entry info.
3618  // // (function or global)
3619  // char *name; // Name of the function or global.
3620  // size_t size; // Size of the entry info (0 if it a function).
3621  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3622  // int32_t reserved; // Reserved, to use by the runtime library.
3623  // };
3624  if (TgtOffloadEntryQTy.isNull()) {
3625  ASTContext &C = CGM.getContext();
3626  auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3627  RD->startDefinition();
3628  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3630  addFieldToRecordDecl(C, RD, C.getSizeType());
3632  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3634  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3635  RD->completeDefinition();
3637  }
3638  return TgtOffloadEntryQTy;
3639 }
3640 
3642  // These are the types we need to build:
3643  // struct __tgt_device_image{
3644  // void *ImageStart; // Pointer to the target code start.
3645  // void *ImageEnd; // Pointer to the target code end.
3646  // // We also add the host entries to the device image, as it may be useful
3647  // // for the target runtime to have access to that information.
3648  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
3649  // // the entries.
3650  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
3651  // // entries (non inclusive).
3652  // };
3653  if (TgtDeviceImageQTy.isNull()) {
3654  ASTContext &C = CGM.getContext();
3655  auto *RD = C.buildImplicitRecord("__tgt_device_image");
3656  RD->startDefinition();
3657  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3658  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3661  RD->completeDefinition();
3663  }
3664  return TgtDeviceImageQTy;
3665 }
3666 
3668  // struct __tgt_bin_desc{
3669  // int32_t NumDevices; // Number of devices supported.
3670  // __tgt_device_image *DeviceImages; // Arrays of device images
3671  // // (one per device).
3672  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
3673  // // entries.
3674  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
3675  // // entries (non inclusive).
3676  // };
3678  ASTContext &C = CGM.getContext();
3679  auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3680  RD->startDefinition();
3682  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3686  RD->completeDefinition();
3688  }
3689  return TgtBinaryDescriptorQTy;
3690 }
3691 
3692 namespace {
3693 struct PrivateHelpersTy {
3694  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3695  const VarDecl *PrivateElemInit)
3696  : Original(Original), PrivateCopy(PrivateCopy),
3697  PrivateElemInit(PrivateElemInit) {}
3698  const VarDecl *Original;
3699  const VarDecl *PrivateCopy;
3700  const VarDecl *PrivateElemInit;
3701 };
3702 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3703 } // anonymous namespace
3704 
3705 static RecordDecl *
3707  if (!Privates.empty()) {
3708  auto &C = CGM.getContext();
3709  // Build struct .kmp_privates_t. {
3710  // /* private vars */
3711  // };
3712  auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3713  RD->startDefinition();
3714  for (auto &&Pair : Privates) {
3715  auto *VD = Pair.second.Original;
3716  auto Type = VD->getType();
3717  Type = Type.getNonReferenceType();
3718  auto *FD = addFieldToRecordDecl(C, RD, Type);
3719  if (VD->hasAttrs()) {
3720  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3721  E(VD->getAttrs().end());
3722  I != E; ++I)
3723  FD->addAttr(*I);
3724  }
3725  }
3726  RD->completeDefinition();
3727  return RD;
3728  }
3729  return nullptr;
3730 }
3731 
3732 static RecordDecl *
3734  QualType KmpInt32Ty,
3735  QualType KmpRoutineEntryPointerQTy) {
3736  auto &C = CGM.getContext();
3737  // Build struct kmp_task_t {
3738  // void * shareds;
3739  // kmp_routine_entry_t routine;
3740  // kmp_int32 part_id;
3741  // kmp_cmplrdata_t data1;
3742  // kmp_cmplrdata_t data2;
3743  // For taskloops additional fields:
3744  // kmp_uint64 lb;
3745  // kmp_uint64 ub;
3746  // kmp_int64 st;
3747  // kmp_int32 liter;
3748  // void * reductions;
3749  // };
3750  auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3751  UD->startDefinition();
3752  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3753  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3754  UD->completeDefinition();
3755  QualType KmpCmplrdataTy = C.getRecordType(UD);
3756  auto *RD = C.buildImplicitRecord("kmp_task_t");
3757  RD->startDefinition();
3758  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3759  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3760  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3761  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3762  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3763  if (isOpenMPTaskLoopDirective(Kind)) {
3764  QualType KmpUInt64Ty =
3765  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3766  QualType KmpInt64Ty =
3767  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3768  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3769  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3770  addFieldToRecordDecl(C, RD, KmpInt64Ty);
3771  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3772  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3773  }
3774  RD->completeDefinition();
3775  return RD;
3776 }
3777 
3778 static RecordDecl *
3780  ArrayRef<PrivateDataTy> Privates) {
3781  auto &C = CGM.getContext();
3782  // Build struct kmp_task_t_with_privates {
3783  // kmp_task_t task_data;
3784  // .kmp_privates_t. privates;
3785  // };
3786  auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3787  RD->startDefinition();
3788  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3789  if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3790  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3791  }
3792  RD->completeDefinition();
3793  return RD;
3794 }
3795 
3796 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3797 /// argument.
3798 /// \code
3799 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3800 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3801 /// For taskloops:
3802 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3803 /// tt->reductions, tt->shareds);
3804 /// return 0;
3805 /// }
3806 /// \endcode
3807 static llvm::Value *
3809  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3810  QualType KmpTaskTWithPrivatesPtrQTy,
3811  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3812  QualType SharedsPtrTy, llvm::Value *TaskFunction,
3813  llvm::Value *TaskPrivatesMap) {
3814  auto &C = CGM.getContext();
3815  FunctionArgList Args;
3816  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3818  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3819  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3821  Args.push_back(&GtidArg);
3822  Args.push_back(&TaskTypeArg);
3823  auto &TaskEntryFnInfo =
3824  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3825  auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3826  auto *TaskEntry =
3828  ".omp_task_entry.", &CGM.getModule());
3829  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3830  CodeGenFunction CGF(CGM);
3831  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3832 
3833  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3834  // tt,
3835  // For taskloops:
3836  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3837  // tt->task_data.shareds);
3838  auto *GtidParam = CGF.EmitLoadOfScalar(
3839  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3840  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3841  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3842  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3843  auto *KmpTaskTWithPrivatesQTyRD =
3844  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3845  LValue Base =
3846  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3847  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3848  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3849  auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3850  auto *PartidParam = PartIdLVal.getPointer();
3851 
3852  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3853  auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3854  auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3855  CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3856  CGF.ConvertTypeForMem(SharedsPtrTy));
3857 
3858  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3859  llvm::Value *PrivatesParam;
3860  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3861  auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3862  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3863  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3864  } else
3865  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3866 
3867  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3868  TaskPrivatesMap,
3869  CGF.Builder
3871  TDBase.getAddress(), CGF.VoidPtrTy)
3872  .getPointer()};
3873  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3874  std::end(CommonArgs));
3875  if (isOpenMPTaskLoopDirective(Kind)) {
3876  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3877  auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3878  auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3879  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3880  auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3881  auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3882  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3883  auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3884  auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3885  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3886  auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3887  auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3888  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3889  auto RLVal = CGF.EmitLValueForField(Base, *RFI);
3890  auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
3891  CallArgs.push_back(LBParam);
3892  CallArgs.push_back(UBParam);
3893  CallArgs.push_back(StParam);
3894  CallArgs.push_back(LIParam);
3895  CallArgs.push_back(RParam);
3896  }
3897  CallArgs.push_back(SharedsParam);
3898 
3899  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3900  CallArgs);
3902  RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3903  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3904  CGF.FinishFunction();
3905  return TaskEntry;
3906 }
3907 
3909  SourceLocation Loc,
3910  QualType KmpInt32Ty,
3911  QualType KmpTaskTWithPrivatesPtrQTy,
3912  QualType KmpTaskTWithPrivatesQTy) {
3913  auto &C = CGM.getContext();
3914  FunctionArgList Args;
3915  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3917  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3918  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3920  Args.push_back(&GtidArg);
3921  Args.push_back(&TaskTypeArg);
3922  FunctionType::ExtInfo Info;
3923  auto &DestructorFnInfo =
3924  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3925  auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3926  auto *DestructorFn =
3928  ".omp_task_destructor.", &CGM.getModule());
3929  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3930  DestructorFnInfo);
3931  CodeGenFunction CGF(CGM);
3932  CGF.disableDebugInfo();
3933  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3934  Args);
3935 
3937  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3938  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3939  auto *KmpTaskTWithPrivatesQTyRD =
3940  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3941  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3942  Base = CGF.EmitLValueForField(Base, *FI);
3943  for (auto *Field :
3944  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3945  if (auto DtorKind = Field->getType().isDestructedType()) {
3946  auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3947  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3948  }
3949  }
3950  CGF.FinishFunction();
3951  return DestructorFn;
3952 }
3953 
3954 /// \brief Emit a privates mapping function for correct handling of private and
3955 /// firstprivate variables.
3956 /// \code
3957 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3958 /// **noalias priv1,..., <tyn> **noalias privn) {
3959 /// *priv1 = &.privates.priv1;
3960 /// ...;
3961 /// *privn = &.privates.privn;
3962 /// }
3963 /// \endcode
3964 static llvm::Value *
3966  ArrayRef<const Expr *> PrivateVars,
3967  ArrayRef<const Expr *> FirstprivateVars,
3968  ArrayRef<const Expr *> LastprivateVars,
3969  QualType PrivatesQTy,
3970  ArrayRef<PrivateDataTy> Privates) {
3971  auto &C = CGM.getContext();
3972  FunctionArgList Args;
3973  ImplicitParamDecl TaskPrivatesArg(
3974  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3975  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3977  Args.push_back(&TaskPrivatesArg);
3978  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3979  unsigned Counter = 1;
3980  for (auto *E: PrivateVars) {
3981  Args.push_back(ImplicitParamDecl::Create(
3982  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3983  C.getPointerType(C.getPointerType(E->getType()))
3984  .withConst()
3985  .withRestrict(),
3987  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3988  PrivateVarsPos[VD] = Counter;
3989  ++Counter;
3990  }
3991  for (auto *E : FirstprivateVars) {
3992  Args.push_back(ImplicitParamDecl::Create(
3993  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3994  C.getPointerType(C.getPointerType(E->getType()))
3995  .withConst()
3996  .withRestrict(),
3998  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3999  PrivateVarsPos[VD] = Counter;
4000  ++Counter;
4001  }
4002  for (auto *E: LastprivateVars) {
4003  Args.push_back(ImplicitParamDecl::Create(
4004  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4005  C.getPointerType(C.getPointerType(E->getType()))
4006  .withConst()
4007  .withRestrict(),
4009  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4010  PrivateVarsPos[VD] = Counter;
4011  ++Counter;
4012  }
4013  auto &TaskPrivatesMapFnInfo =
4014  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4015  auto *TaskPrivatesMapTy =
4016  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4017  auto *TaskPrivatesMap = llvm::Function::Create(
4018  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
4019  ".omp_task_privates_map.", &CGM.getModule());
4020  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
4021  TaskPrivatesMapFnInfo);
4022  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4023  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4024  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4025  CodeGenFunction CGF(CGM);
4026  CGF.disableDebugInfo();
4027  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4028  TaskPrivatesMapFnInfo, Args);
4029 
4030  // *privi = &.privates.privi;
4032  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4033  TaskPrivatesArg.getType()->castAs<PointerType>());
4034  auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4035  Counter = 0;
4036  for (auto *Field : PrivatesQTyRD->fields()) {
4037  auto FieldLVal = CGF.EmitLValueForField(Base, Field);
4038  auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4039  auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4040  auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4041  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4042  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4043  ++Counter;
4044  }
4045  CGF.FinishFunction();
4046  return TaskPrivatesMap;
4047 }
4048 
4049 static int array_pod_sort_comparator(const PrivateDataTy *P1,
4050  const PrivateDataTy *P2) {
4051  return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
4052 }
4053 
4054 /// Emit initialization for private variables in task-based directives.
4056  const OMPExecutableDirective &D,
4057  Address KmpTaskSharedsPtr, LValue TDBase,
4058  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4059  QualType SharedsTy, QualType SharedsPtrTy,
4060  const OMPTaskDataTy &Data,
4061  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4062  auto &C = CGF.getContext();
4063  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4064  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4065  LValue SrcBase;
4066  if (!Data.FirstprivateVars.empty()) {
4067  SrcBase = CGF.MakeAddrLValue(
4069  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4070  SharedsTy);
4071  }
4073  cast<CapturedStmt>(*D.getAssociatedStmt()));
4074  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4075  for (auto &&Pair : Privates) {
4076  auto *VD = Pair.second.PrivateCopy;
4077  auto *Init = VD->getAnyInitializer();
4078  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4079  !CGF.isTrivialInitializer(Init)))) {
4080  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4081  if (auto *Elem = Pair.second.PrivateElemInit) {
4082  auto *OriginalVD = Pair.second.Original;
4083  auto *SharedField = CapturesInfo.lookup(OriginalVD);
4084  auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4085  SharedRefLValue = CGF.MakeAddrLValue(
4086  Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4087  SharedRefLValue.getType(),
4089  SharedRefLValue.getBaseInfo().getMayAlias()),
4090  CGF.CGM.getTBAAAccessInfo(SharedRefLValue.getType()));
4091  QualType Type = OriginalVD->getType();
4092  if (Type->isArrayType()) {
4093  // Initialize firstprivate array.
4094  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4095  // Perform simple memcpy.
4096  CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
4097  SharedRefLValue.getAddress(), Type);
4098  } else {
4099  // Initialize firstprivate array using element-by-element
4100  // initialization.
4102  PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4103  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4104  Address SrcElement) {
4105  // Clean up any temporaries needed by the initialization.
4106  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4107  InitScope.addPrivate(
4108  Elem, [SrcElement]() -> Address { return SrcElement; });
4109  (void)InitScope.Privatize();
4110  // Emit initialization for single element.
4112  CGF, &CapturesInfo);
4113  CGF.EmitAnyExprToMem(Init, DestElement,
4114  Init->getType().getQualifiers(),
4115  /*IsInitializer=*/false);
4116  });
4117  }
4118  } else {
4119  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4120  InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4121  return SharedRefLValue.getAddress();
4122  });
4123  (void)InitScope.Privatize();
4124  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4125  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4126  /*capturedByInit=*/false);
4127  }
4128  } else
4129  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4130  }
4131  ++FI;
4132  }
4133 }
4134 
4135 /// Check if duplication function is required for taskloops.
4137  ArrayRef<PrivateDataTy> Privates) {
4138  bool InitRequired = false;
4139  for (auto &&Pair : Privates) {
4140  auto *VD = Pair.second.PrivateCopy;
4141  auto *Init = VD->getAnyInitializer();
4142  InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4143  !CGF.isTrivialInitializer(Init));
4144  }
4145  return InitRequired;
4146 }
4147 
4148 
4149 /// Emit task_dup function (for initialization of
4150 /// private/firstprivate/lastprivate vars and last_iter flag)
4151 /// \code
4152 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4153 /// lastpriv) {
4154 /// // setup lastprivate flag
4155 /// task_dst->last = lastpriv;
4156 /// // could be constructor calls here...
4157 /// }
4158 /// \endcode
4159 static llvm::Value *
4161  const OMPExecutableDirective &D,
4162  QualType KmpTaskTWithPrivatesPtrQTy,
4163  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4164  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4165  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4166  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4167  auto &C = CGM.getContext();
4168  FunctionArgList Args;
4169  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4170  KmpTaskTWithPrivatesPtrQTy,
4172  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4173  KmpTaskTWithPrivatesPtrQTy,
4175  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4177  Args.push_back(&DstArg);
4178  Args.push_back(&SrcArg);
4179  Args.push_back(&LastprivArg);
4180  auto &TaskDupFnInfo =
4181  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4182  auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4183  auto *TaskDup =
4185  ".omp_task_dup.", &CGM.getModule());
4186  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
4187  CodeGenFunction CGF(CGM);
4188  CGF.disableDebugInfo();
4189  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
4190 
4191  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4192  CGF.GetAddrOfLocalVar(&DstArg),
4193  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4194  // task_dst->liter = lastpriv;
4195  if (WithLastIter) {
4196  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4198  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4199  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4200  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4201  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4202  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4203  }
4204 
4205  // Emit initial values for private copies (if any).
4206  assert(!Privates.empty());
4207  Address KmpTaskSharedsPtr = Address::invalid();
4208  if (!Data.FirstprivateVars.empty()) {
4209  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4210  CGF.GetAddrOfLocalVar(&SrcArg),
4211  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4213  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4214  KmpTaskSharedsPtr = Address(
4216  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4217  KmpTaskTShareds)),
4218  Loc),
4219  CGF.getNaturalTypeAlignment(SharedsTy));
4220  }
4221  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4222  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4223  CGF.FinishFunction();
4224  return TaskDup;
4225 }
4226 
4227 /// Checks if destructor function is required to be generated.
4228 /// \return true if cleanups are required, false otherwise.
4229 static bool
4230 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4231  bool NeedsCleanup = false;
4232  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4233  auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4234  for (auto *FD : PrivateRD->fields()) {
4235  NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4236  if (NeedsCleanup)
4237  break;
4238  }
4239  return NeedsCleanup;
4240 }
4241 
4242 CGOpenMPRuntime::TaskResultTy
4244  const OMPExecutableDirective &D,
4245  llvm::Value *TaskFunction, QualType SharedsTy,
4246  Address Shareds, const OMPTaskDataTy &Data) {
4247  auto &C = CGM.getContext();
4249  // Aggregate privates and sort them by the alignment.
4250  auto I = Data.PrivateCopies.begin();
4251  for (auto *E : Data.PrivateVars) {
4252  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4253  Privates.push_back(std::make_pair(
4254  C.getDeclAlign(VD),
4255  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4256  /*PrivateElemInit=*/nullptr)));
4257  ++I;
4258  }
4259  I = Data.FirstprivateCopies.begin();
4260  auto IElemInitRef = Data.FirstprivateInits.begin();
4261  for (auto *E : Data.FirstprivateVars) {
4262  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4263  Privates.push_back(std::make_pair(
4264  C.getDeclAlign(VD),
4265  PrivateHelpersTy(
4266  VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4267  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
4268  ++I;
4269  ++IElemInitRef;
4270  }
4271  I = Data.LastprivateCopies.begin();
4272  for (auto *E : Data.LastprivateVars) {
4273  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4274  Privates.push_back(std::make_pair(
4275  C.getDeclAlign(VD),
4276  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4277  /*PrivateElemInit=*/nullptr)));
4278  ++I;
4279  }
4280  llvm::array_pod_sort(Privates.begin(), Privates.end(),
4282  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4283  // Build type kmp_routine_entry_t (if not built yet).
4284  emitKmpRoutineEntryT(KmpInt32Ty);
4285  // Build type kmp_task_t (if not built yet).
4287  if (SavedKmpTaskloopTQTy.isNull()) {
4289  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4290  }
4292  } else {
4293  assert(D.getDirectiveKind() == OMPD_task &&
4294  "Expected taskloop or task directive");
4295  if (SavedKmpTaskTQTy.isNull()) {
4296  SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4297  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4298  }
4300  }
4301  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4302  // Build particular struct kmp_task_t for the given task.
4303  auto *KmpTaskTWithPrivatesQTyRD =
4305  auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4306  QualType KmpTaskTWithPrivatesPtrQTy =
4307  C.getPointerType(KmpTaskTWithPrivatesQTy);
4308  auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4309  auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
4310  auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4311  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4312 
4313  // Emit initial values for private copies (if any).
4314  llvm::Value *TaskPrivatesMap = nullptr;
4315  auto *TaskPrivatesMapTy =
4316  std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4317  if (!Privates.empty()) {
4318  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4319  TaskPrivatesMap = emitTaskPrivateMappingFunction(
4320  CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4321  FI->getType(), Privates);
4322  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4323  TaskPrivatesMap, TaskPrivatesMapTy);
4324  } else {
4325  TaskPrivatesMap = llvm::ConstantPointerNull::get(
4326  cast<llvm::PointerType>(TaskPrivatesMapTy));
4327  }
4328  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4329  // kmp_task_t *tt);
4330  auto *TaskEntry = emitProxyTaskFunction(
4331  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4332  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4333  TaskPrivatesMap);
4334 
4335  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4336  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4337  // kmp_routine_entry_t *task_entry);
4338  // Task flags. Format is taken from
4339  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4340  // description of kmp_tasking_flags struct.
4341  enum {
4342  TiedFlag = 0x1,
4343  FinalFlag = 0x2,
4344  DestructorsFlag = 0x8,
4345  PriorityFlag = 0x20
4346  };
4347  unsigned Flags = Data.Tied ? TiedFlag : 0;
4348  bool NeedsCleanup = false;
4349  if (!Privates.empty()) {
4350  NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4351  if (NeedsCleanup)
4352  Flags = Flags | DestructorsFlag;
4353  }
4354  if (Data.Priority.getInt())
4355  Flags = Flags | PriorityFlag;
4356  auto *TaskFlags =
4357  Data.Final.getPointer()
4358  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4359  CGF.Builder.getInt32(FinalFlag),
4360  CGF.Builder.getInt32(/*C=*/0))
4361  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4362  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4363  auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4364  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4365  getThreadID(CGF, Loc), TaskFlags,
4366  KmpTaskTWithPrivatesTySize, SharedsSize,
4368  TaskEntry, KmpRoutineEntryPtrTy)};
4369  auto *NewTask = CGF.EmitRuntimeCall(
4371  auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4372  NewTask, KmpTaskTWithPrivatesPtrTy);
4373  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4374  KmpTaskTWithPrivatesQTy);
4375  LValue TDBase =
4376  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4377  // Fill the data in the resulting kmp_task_t record.
4378  // Copy shareds if there are any.
4379  Address KmpTaskSharedsPtr = Address::invalid();
4380  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4381  KmpTaskSharedsPtr =
4383  CGF.EmitLValueForField(
4384  TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4385  KmpTaskTShareds)),
4386  Loc),
4387  CGF.getNaturalTypeAlignment(SharedsTy));
4388  CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
4389  }
4390  // Emit initial values for private copies (if any).
4391  TaskResultTy Result;
4392  if (!Privates.empty()) {
4393  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4394  SharedsTy, SharedsPtrTy, Data, Privates,
4395  /*ForDup=*/false);
4397  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4398  Result.TaskDupFn = emitTaskDupFunction(
4399  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4400  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4401  /*WithLastIter=*/!Data.LastprivateVars.empty());
4402  }
4403  }
4404  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4405  enum { Priority = 0, Destructors = 1 };
4406  // Provide pointer to function with destructors for privates.
4407  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4408  auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
4409  if (NeedsCleanup) {
4410  llvm::Value *DestructorFn = emitDestructorsFunction(
4411  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4412  KmpTaskTWithPrivatesQTy);
4413  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4414  LValue DestructorsLV = CGF.EmitLValueForField(
4415  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4417  DestructorFn, KmpRoutineEntryPtrTy),
4418  DestructorsLV);
4419  }
4420  // Set priority.
4421  if (Data.Priority.getInt()) {
4422  LValue Data2LV = CGF.EmitLValueForField(
4423  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4424  LValue PriorityLV = CGF.EmitLValueForField(
4425  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4426  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4427  }
4428  Result.NewTask = NewTask;
4429  Result.TaskEntry = TaskEntry;
4430  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4431  Result.TDBase = TDBase;
4432  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4433  return Result;
4434 }
4435 
4437  const OMPExecutableDirective &D,
4438  llvm::Value *TaskFunction,
4439  QualType SharedsTy, Address Shareds,
4440  const Expr *IfCond,
4441  const OMPTaskDataTy &Data) {
4442  if (!CGF.HaveInsertPoint())
4443  return;
4444 
4445  TaskResultTy Result =
4446  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4447  llvm::Value *NewTask = Result.NewTask;
4448  llvm::Value *TaskEntry = Result.TaskEntry;
4449  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4450  LValue TDBase = Result.TDBase;
4451  RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4452  auto &C = CGM.getContext();
4453  // Process list of dependences.
4454  Address DependenciesArray = Address::invalid();
4455  unsigned NumDependencies = Data.Dependences.size();
4456  if (NumDependencies) {
4457  // Dependence kind for RTL.
4458  enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
4459  enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4460  RecordDecl *KmpDependInfoRD;
4461  QualType FlagsTy =
4462  C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4463  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4464  if (KmpDependInfoTy.isNull()) {
4465  KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4466  KmpDependInfoRD->startDefinition();
4467  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4468  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4469  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4470  KmpDependInfoRD->completeDefinition();
4471  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4472  } else
4473  KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4474  CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
4475  // Define type kmp_depend_info[<Dependences.size()>];
4476  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4477  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
4478  ArrayType::Normal, /*IndexTypeQuals=*/0);
4479  // kmp_depend_info[<Dependences.size()>] deps;
4480  DependenciesArray =
4481  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4482  for (unsigned i = 0; i < NumDependencies; ++i) {
4483  const Expr *E = Data.Dependences[i].second;
4484  auto Addr = CGF.EmitLValue(E);
4485  llvm::Value *Size;
4486  QualType Ty = E->getType();
4487  if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4488  LValue UpAddrLVal =
4489  CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
4490  llvm::Value *UpAddr =
4491  CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
4492  llvm::Value *LowIntPtr =
4493  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
4494  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
4495  Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4496  } else
4497  Size = CGF.getTypeSize(Ty);
4498  auto Base = CGF.MakeAddrLValue(
4499  CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
4500  KmpDependInfoTy);
4501  // deps[i].base_addr = &<Dependences[i].second>;
4502  auto BaseAddrLVal = CGF.EmitLValueForField(
4503  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4504  CGF.EmitStoreOfScalar(
4505  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
4506  BaseAddrLVal);
4507  // deps[i].len = sizeof(<Dependences[i].second>);
4508  auto LenLVal = CGF.EmitLValueForField(
4509  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4510  CGF.EmitStoreOfScalar(Size, LenLVal);
4511  // deps[i].flags = <Dependences[i].first>;
4512  RTLDependenceKindTy DepKind;
4513  switch (Data.Dependences[i].first) {
4514  case OMPC_DEPEND_in:
4515  DepKind = DepIn;
4516  break;
4517  // Out and InOut dependencies must use the same code.
4518  case OMPC_DEPEND_out:
4519  case OMPC_DEPEND_inout:
4520  DepKind = DepInOut;
4521  break;
4522  case OMPC_DEPEND_source:
4523  case OMPC_DEPEND_sink:
4524  case OMPC_DEPEND_unknown:
4525  llvm_unreachable("Unknown task dependence type");
4526  }
4527  auto FlagsLVal = CGF.EmitLValueForField(
4528  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4529  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4530  FlagsLVal);
4531  }
4532  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4533  CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
4534  CGF.VoidPtrTy);
4535  }
4536 
4537  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4538  // libcall.
4539  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4540  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4541  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4542  // list is not empty
4543  auto *ThreadID = getThreadID(CGF, Loc);
4544  auto *UpLoc = emitUpdateLocation(CGF, Loc);
4545  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4546  llvm::Value *DepTaskArgs[7];
4547  if (NumDependencies) {
4548  DepTaskArgs[0] = UpLoc;
4549  DepTaskArgs[1] = ThreadID;
4550  DepTaskArgs[2] = NewTask;
4551  DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
4552  DepTaskArgs[4] = DependenciesArray.getPointer();
4553  DepTaskArgs[5] = CGF.Builder.getInt32(0);
4554  DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4555  }
4556  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
4557  &TaskArgs,
4558  &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4559  if (!Data.Tied) {
4560  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4561  auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4562  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4563  }
4564  if (NumDependencies) {
4565  CGF.EmitRuntimeCall(
4567  } else {
4568  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4569  TaskArgs);
4570  }
4571  // Check if parent region is untied and build return for untied task;
4572  if (auto *Region =
4573  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4574  Region->emitUntiedSwitch(CGF);
4575  };
4576 
4577  llvm::Value *DepWaitTaskArgs[6];
4578  if (NumDependencies) {
4579  DepWaitTaskArgs[0] = UpLoc;
4580  DepWaitTaskArgs[1] = ThreadID;
4581  DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4582  DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4583  DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4584  DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4585  }
4586  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4587  NumDependencies, &DepWaitTaskArgs,
4588  Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4589  auto &RT = CGF.CGM.getOpenMPRuntime();
4590  CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4591  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4592  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4593  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4594  // is specified.
4595  if (NumDependencies)
4596  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4597  DepWaitTaskArgs);
4598  // Call proxy_task_entry(gtid, new_task);
4599  auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4600  Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4601  Action.Enter(CGF);
4602  llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4603  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4604  OutlinedFnArgs);
4605  };
4606 
4607  // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4608  // kmp_task_t *new_task);
4609  // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4610  // kmp_task_t *new_task);
4611  RegionCodeGenTy RCG(CodeGen);
4612  CommonActionTy Action(
4613  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4614  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4615  RCG.setAction(Action);
4616  RCG(CGF);
4617  };
4618 
4619  if (IfCond)
4620  emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4621  else {
4622  RegionCodeGenTy ThenRCG(ThenCodeGen);
4623  ThenRCG(CGF);
4624  }
4625 }
4626 
4628  const OMPLoopDirective &D,
4629  llvm::Value *TaskFunction,
4630  QualType SharedsTy, Address Shareds,
4631  const Expr *IfCond,
4632  const OMPTaskDataTy &Data) {
4633  if (!CGF.HaveInsertPoint())
4634  return;
4635  TaskResultTy Result =
4636  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4637  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4638  // libcall.
4639  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4640  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4641  // sched, kmp_uint64 grainsize, void *task_dup);
4642  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4643  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4644  llvm::Value *IfVal;
4645  if (IfCond) {
4646  IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4647  /*isSigned=*/true);
4648  } else
4649  IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4650 
4651  LValue LBLVal = CGF.EmitLValueForField(
4652  Result.TDBase,
4653  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4654  auto *LBVar =
4655  cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4656  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4657  /*IsInitializer=*/true);
4658  LValue UBLVal = CGF.EmitLValueForField(
4659  Result.TDBase,
4660  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4661  auto *UBVar =
4662  cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4663  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4664  /*IsInitializer=*/true);
4665  LValue StLVal = CGF.EmitLValueForField(
4666  Result.TDBase,
4667  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4668  auto *StVar =
4669  cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4670  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4671  /*IsInitializer=*/true);
4672  // Store reductions address.
4673  LValue RedLVal = CGF.EmitLValueForField(
4674  Result.TDBase,
4675  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4676  if (Data.Reductions)
4677  CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4678  else {
4679  CGF.EmitNullInitialization(RedLVal.getAddress(),
4680  CGF.getContext().VoidPtrTy);
4681  }
4682  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4683  llvm::Value *TaskArgs[] = {
4684  UpLoc,
4685  ThreadID,
4686  Result.NewTask,
4687  IfVal,
4688  LBLVal.getPointer(),
4689  UBLVal.getPointer(),
4690  CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4691  llvm::ConstantInt::getNullValue(
4692  CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
4693  llvm::ConstantInt::getSigned(
4694  CGF.IntTy, Data.Schedule.getPointer()
4695  ? Data.Schedule.getInt() ? NumTasks : Grainsize
4696  : NoSchedule),
4697  Data.Schedule.getPointer()
4698  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4699  /*isSigned=*/false)
4700  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4702  Result.TaskDupFn, CGF.VoidPtrTy)
4703  : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4705 }
4706 
4707 /// \brief Emit reduction operation for each element of array (required for
4708 /// array sections) LHS op = RHS.
4709 /// \param Type Type of array.
4710 /// \param LHSVar Variable on the left side of the reduction operation
4711 /// (references element of array in original variable).
4712 /// \param RHSVar Variable on the right side of the reduction operation
4713 /// (references element of array in original variable).
4714 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4715 /// RHSVar.
4717  CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4718  const VarDecl *RHSVar,
4719  const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4720  const Expr *, const Expr *)> &RedOpGen,
4721  const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4722  const Expr *UpExpr = nullptr) {
4723  // Perform element-by-element initialization.
4724  QualType ElementTy;
4725  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4726  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4727 
4728  // Drill down to the base element type on both arrays.
4729  auto ArrayTy = Type->getAsArrayTypeUnsafe();
4730  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4731 
4732  auto RHSBegin = RHSAddr.getPointer();
4733  auto LHSBegin = LHSAddr.getPointer();
4734  // Cast from pointer to array type to pointer to single element.
4735  auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4736  // The basic structure here is a while-do loop.
4737  auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4738  auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4739  auto IsEmpty =
4740  CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4741  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4742 
4743  // Enter the loop body, making that address the current address.
4744  auto EntryBB = CGF.Builder.GetInsertBlock();
4745  CGF.EmitBlock(BodyBB);
4746 
4747  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4748 
4749  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4750  RHSBegin->getType(), 2,