clang  6.0.0svn
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitmaskEnum.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/CallSite.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalValue.h"
27 #include "llvm/IR/Value.h"
28 #include "llvm/Support/Format.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <cassert>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 
35 namespace {
36 /// \brief Base class for handling code generation inside OpenMP regions.
37 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
38 public:
39  /// \brief Kinds of OpenMP regions used in codegen.
40  enum CGOpenMPRegionKind {
41  /// \brief Region with outlined function for standalone 'parallel'
42  /// directive.
43  ParallelOutlinedRegion,
44  /// \brief Region with outlined function for standalone 'task' directive.
45  TaskOutlinedRegion,
46  /// \brief Region for constructs that do not require function outlining,
47  /// like 'for', 'sections', 'atomic' etc. directives.
48  InlinedRegion,
49  /// \brief Region with outlined function for standalone 'target' directive.
50  TargetRegion,
51  };
52 
53  CGOpenMPRegionInfo(const CapturedStmt &CS,
54  const CGOpenMPRegionKind RegionKind,
56  bool HasCancel)
57  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
58  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
59 
60  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
62  bool HasCancel)
63  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
64  Kind(Kind), HasCancel(HasCancel) {}
65 
66  /// \brief Get a variable or parameter for storing global thread id
67  /// inside OpenMP construct.
68  virtual const VarDecl *getThreadIDVariable() const = 0;
69 
70  /// \brief Emit the captured statement body.
71  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
72 
73  /// \brief Get an LValue for the current ThreadID variable.
74  /// \return LValue for thread id variable. This LValue always has type int32*.
75  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
76 
77  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
78 
79  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
80 
81  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
82 
83  bool hasCancel() const { return HasCancel; }
84 
85  static bool classof(const CGCapturedStmtInfo *Info) {
86  return Info->getKind() == CR_OpenMP;
87  }
88 
89  ~CGOpenMPRegionInfo() override = default;
90 
91 protected:
92  CGOpenMPRegionKind RegionKind;
93  RegionCodeGenTy CodeGen;
95  bool HasCancel;
96 };
97 
98 /// \brief API for captured statement code generation in OpenMP constructs.
99 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
100 public:
101  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
102  const RegionCodeGenTy &CodeGen,
103  OpenMPDirectiveKind Kind, bool HasCancel,
104  StringRef HelperName)
105  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
106  HasCancel),
107  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
108  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
109  }
110 
111  /// \brief Get a variable or parameter for storing global thread id
112  /// inside OpenMP construct.
113  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
114 
115  /// \brief Get the name of the capture helper.
116  StringRef getHelperName() const override { return HelperName; }
117 
118  static bool classof(const CGCapturedStmtInfo *Info) {
119  return CGOpenMPRegionInfo::classof(Info) &&
120  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
121  ParallelOutlinedRegion;
122  }
123 
124 private:
125  /// \brief A variable or parameter storing global thread id for OpenMP
126  /// constructs.
127  const VarDecl *ThreadIDVar;
128  StringRef HelperName;
129 };
130 
131 /// \brief API for captured statement code generation in OpenMP constructs.
132 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
133 public:
134  class UntiedTaskActionTy final : public PrePostActionTy {
135  bool Untied;
136  const VarDecl *PartIDVar;
137  const RegionCodeGenTy UntiedCodeGen;
138  llvm::SwitchInst *UntiedSwitch = nullptr;
139 
140  public:
141  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
142  const RegionCodeGenTy &UntiedCodeGen)
143  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
144  void Enter(CodeGenFunction &CGF) override {
145  if (Untied) {
146  // Emit task switching point.
147  auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
148  CGF.GetAddrOfLocalVar(PartIDVar),
149  PartIDVar->getType()->castAs<PointerType>());
150  auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
151  auto *DoneBB = CGF.createBasicBlock(".untied.done.");
152  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153  CGF.EmitBlock(DoneBB);
155  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157  CGF.Builder.GetInsertBlock());
158  emitUntiedSwitch(CGF);
159  }
160  }
161  void emitUntiedSwitch(CodeGenFunction &CGF) const {
162  if (Untied) {
163  auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
164  CGF.GetAddrOfLocalVar(PartIDVar),
165  PartIDVar->getType()->castAs<PointerType>());
166  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167  PartIdLVal);
168  UntiedCodeGen(CGF);
169  CodeGenFunction::JumpDest CurPoint =
170  CGF.getJumpDestInCurrentScope(".untied.next.");
172  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174  CGF.Builder.GetInsertBlock());
175  CGF.EmitBranchThroughCleanup(CurPoint);
176  CGF.EmitBlock(CurPoint.getBlock());
177  }
178  }
179  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180  };
181  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182  const VarDecl *ThreadIDVar,
183  const RegionCodeGenTy &CodeGen,
184  OpenMPDirectiveKind Kind, bool HasCancel,
185  const UntiedTaskActionTy &Action)
186  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187  ThreadIDVar(ThreadIDVar), Action(Action) {
188  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189  }
190 
191  /// \brief Get a variable or parameter for storing global thread id
192  /// inside OpenMP construct.
193  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195  /// \brief Get an LValue for the current ThreadID variable.
196  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198  /// \brief Get the name of the capture helper.
199  StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201  void emitUntiedSwitch(CodeGenFunction &CGF) override {
202  Action.emitUntiedSwitch(CGF);
203  }
204 
205  static bool classof(const CGCapturedStmtInfo *Info) {
206  return CGOpenMPRegionInfo::classof(Info) &&
207  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208  TaskOutlinedRegion;
209  }
210 
211 private:
212  /// \brief A variable or parameter storing global thread id for OpenMP
213  /// constructs.
214  const VarDecl *ThreadIDVar;
215  /// Action for emitting code for untied tasks.
216  const UntiedTaskActionTy &Action;
217 };
218 
219 /// \brief API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224  const RegionCodeGenTy &CodeGen,
225  OpenMPDirectiveKind Kind, bool HasCancel)
226  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227  OldCSI(OldCSI),
228  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230  // \brief Retrieve the value of the context parameter.
231  llvm::Value *getContextValue() const override {
232  if (OuterRegionInfo)
233  return OuterRegionInfo->getContextValue();
234  llvm_unreachable("No context value for inlined OpenMP region");
235  }
236 
237  void setContextValue(llvm::Value *V) override {
238  if (OuterRegionInfo) {
239  OuterRegionInfo->setContextValue(V);
240  return;
241  }
242  llvm_unreachable("No context value for inlined OpenMP region");
243  }
244 
245  /// \brief Lookup the captured field decl for a variable.
246  const FieldDecl *lookup(const VarDecl *VD) const override {
247  if (OuterRegionInfo)
248  return OuterRegionInfo->lookup(VD);
249  // If there is no outer outlined region,no need to lookup in a list of
250  // captured variables, we can use the original one.
251  return nullptr;
252  }
253 
254  FieldDecl *getThisFieldDecl() const override {
255  if (OuterRegionInfo)
256  return OuterRegionInfo->getThisFieldDecl();
257  return nullptr;
258  }
259 
260  /// \brief Get a variable or parameter for storing global thread id
261  /// inside OpenMP construct.
262  const VarDecl *getThreadIDVariable() const override {
263  if (OuterRegionInfo)
264  return OuterRegionInfo->getThreadIDVariable();
265  return nullptr;
266  }
267 
268  /// \brief Get an LValue for the current ThreadID variable.
269  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270  if (OuterRegionInfo)
271  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272  llvm_unreachable("No LValue for inlined OpenMP construct");
273  }
274 
275  /// \brief Get the name of the capture helper.
276  StringRef getHelperName() const override {
277  if (auto *OuterRegionInfo = getOldCSI())
278  return OuterRegionInfo->getHelperName();
279  llvm_unreachable("No helper name for inlined OpenMP construct");
280  }
281 
282  void emitUntiedSwitch(CodeGenFunction &CGF) override {
283  if (OuterRegionInfo)
284  OuterRegionInfo->emitUntiedSwitch(CGF);
285  }
286 
287  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289  static bool classof(const CGCapturedStmtInfo *Info) {
290  return CGOpenMPRegionInfo::classof(Info) &&
291  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292  }
293 
294  ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297  /// \brief CodeGen info about outer OpenMP region.
299  CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// \brief API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310  const RegionCodeGenTy &CodeGen, StringRef HelperName)
311  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312  /*HasCancel=*/false),
313  HelperName(HelperName) {}
314 
315  /// \brief This is unused for target regions because each starts executing
316  /// with a single thread.
317  const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319  /// \brief Get the name of the capture helper.
320  StringRef getHelperName() const override { return HelperName; }
321 
322  static bool classof(const CGCapturedStmtInfo *Info) {
323  return CGOpenMPRegionInfo::classof(Info) &&
324  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325  }
326 
327 private:
328  StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332  llvm_unreachable("No codegen for expressions");
333 }
334 /// \brief API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340  OMPD_unknown,
341  /*HasCancel=*/false),
342  PrivScope(CGF) {
343  // Make sure the globals captured in the provided statement are local by
344  // using the privatization logic. We assume the same variable is not
345  // captured more than once.
346  for (auto &C : CS.captures()) {
347  if (!C.capturesVariable() && !C.capturesVariableByCopy())
348  continue;
349 
350  const VarDecl *VD = C.getCapturedVar();
351  if (VD->isLocalVarDeclOrParm())
352  continue;
353 
354  DeclRefExpr DRE(const_cast<VarDecl *>(VD),
355  /*RefersToEnclosingVariableOrCapture=*/false,
357  SourceLocation());
358  PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
359  return CGF.EmitLValue(&DRE).getAddress();
360  });
361  }
362  (void)PrivScope.Privatize();
363  }
364 
365  /// \brief Lookup the captured field decl for a variable.
366  const FieldDecl *lookup(const VarDecl *VD) const override {
367  if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
368  return FD;
369  return nullptr;
370  }
371 
372  /// \brief Emit the captured statement body.
373  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
374  llvm_unreachable("No body for expressions");
375  }
376 
377  /// \brief Get a variable or parameter for storing global thread id
378  /// inside OpenMP construct.
379  const VarDecl *getThreadIDVariable() const override {
380  llvm_unreachable("No thread id for expressions");
381  }
382 
383  /// \brief Get the name of the capture helper.
384  StringRef getHelperName() const override {
385  llvm_unreachable("No helper name for expressions");
386  }
387 
388  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
389 
390 private:
391  /// Private scope to capture global variables.
393 };
394 
395 /// \brief RAII for emitting code of OpenMP constructs.
396 class InlinedOpenMPRegionRAII {
397  CodeGenFunction &CGF;
398  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
399  FieldDecl *LambdaThisCaptureField = nullptr;
400 
401 public:
402  /// \brief Constructs region for combined constructs.
403  /// \param CodeGen Code generation sequence for combined directives. Includes
404  /// a list of functions used for code generation of implicitly inlined
405  /// regions.
406  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407  OpenMPDirectiveKind Kind, bool HasCancel)
408  : CGF(CGF) {
409  // Start emission for the construct.
410  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414  CGF.LambdaThisCaptureField = nullptr;
415  }
416 
417  ~InlinedOpenMPRegionRAII() {
418  // Restore original CapturedStmtInfo only if we're done with code emission.
419  auto *OldCSI =
420  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
421  delete CGF.CapturedStmtInfo;
422  CGF.CapturedStmtInfo = OldCSI;
423  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
424  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
425  }
426 };
427 
428 /// \brief Values for bit flags used in the ident_t to describe the fields.
429 /// All enumeric elements are named and described in accordance with the code
430 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
431 enum OpenMPLocationFlags : unsigned {
432  /// \brief Use trampoline for internal microtask.
433  OMP_IDENT_IMD = 0x01,
434  /// \brief Use c-style ident structure.
435  OMP_IDENT_KMPC = 0x02,
436  /// \brief Atomic reduction option for kmpc_reduce.
437  OMP_ATOMIC_REDUCE = 0x10,
438  /// \brief Explicit 'barrier' directive.
439  OMP_IDENT_BARRIER_EXPL = 0x20,
440  /// \brief Implicit barrier in code.
441  OMP_IDENT_BARRIER_IMPL = 0x40,
442  /// \brief Implicit barrier in 'for' directive.
443  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
444  /// \brief Implicit barrier in 'sections' directive.
445  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
446  /// \brief Implicit barrier in 'single' directive.
447  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
448  /// Call of __kmp_for_static_init for static loop.
449  OMP_IDENT_WORK_LOOP = 0x200,
450  /// Call of __kmp_for_static_init for sections.
451  OMP_IDENT_WORK_SECTIONS = 0x400,
452  /// Call of __kmp_for_static_init for distribute.
453  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
454  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
455 };
456 
457 /// \brief Describes ident structure that describes a source location.
458 /// All descriptions are taken from
459 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
460 /// Original structure:
461 /// typedef struct ident {
462 /// kmp_int32 reserved_1; /**< might be used in Fortran;
463 /// see above */
464 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
465 /// KMP_IDENT_KMPC identifies this union
466 /// member */
467 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
468 /// see above */
469 ///#if USE_ITT_BUILD
470 /// /* but currently used for storing
471 /// region-specific ITT */
472 /// /* contextual information. */
473 ///#endif /* USE_ITT_BUILD */
474 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
475 /// C++ */
476 /// char const *psource; /**< String describing the source location.
477 /// The string is composed of semi-colon separated
478 // fields which describe the source file,
479 /// the function and a pair of line numbers that
480 /// delimit the construct.
481 /// */
482 /// } ident_t;
484  /// \brief might be used in Fortran
486  /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
488  /// \brief Not really used in Fortran any more
490  /// \brief Source[4] in Fortran, do not use for C++
492  /// \brief String describing the source location. The string is composed of
493  /// semi-colon separated fields which describe the source file, the function
494  /// and a pair of line numbers that delimit the construct.
496 };
497 
498 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
499 /// the enum sched_type in kmp.h).
501  /// \brief Lower bound for default (unordered) versions.
509  /// static with chunk adjustment (e.g., simd)
511  /// \brief Lower bound for 'ordered' versions.
520  /// \brief dist_schedule types
523  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
524  /// Set if the monotonic schedule modifier was present.
526  /// Set if the nonmonotonic schedule modifier was present.
528 };
529 
531  /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
532  /// kmpc_micro microtask, ...);
534  /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
535  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
537  /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
538  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
540  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
542  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
543  // kmp_critical_name *crit);
545  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
546  // global_tid, kmp_critical_name *crit, uintptr_t hint);
548  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
549  // kmp_critical_name *crit);
551  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
552  // global_tid);
554  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
556  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
558  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
559  // global_tid);
561  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
562  // global_tid);
564  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
565  // kmp_int32 num_threads);
567  // Call to void __kmpc_flush(ident_t *loc);
569  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
571  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
573  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
574  // int end_part);
576  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
578  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
580  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
581  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
582  // kmp_routine_entry_t *task_entry);
584  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
585  // new_task);
587  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
588  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
589  // kmp_int32 didit);
591  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
592  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
593  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
595  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
596  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
597  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
598  // *lck);
600  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
601  // kmp_critical_name *lck);
603  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
604  // kmp_critical_name *lck);
606  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
607  // kmp_task_t * new_task);
609  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
610  // kmp_task_t * new_task);
612  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
614  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
616  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
617  // global_tid);
619  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
621  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
623  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
624  // int proc_bind);
626  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
627  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
628  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
630  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
631  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
632  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
634  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
635  // global_tid, kmp_int32 cncl_kind);
637  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
638  // kmp_int32 cncl_kind);
640  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
641  // kmp_int32 num_teams, kmp_int32 thread_limit);
643  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
644  // microtask, ...);
646  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
647  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
648  // sched, kmp_uint64 grainsize, void *task_dup);
650  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
651  // num_dims, struct kmp_dim *dims);
653  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
655  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
656  // *vec);
658  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
659  // *vec);
661  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
662  // *data);
664  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
665  // *d);
667 
668  //
669  // Offloading related calls
670  //
671  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
672  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
673  // *arg_types);
675  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
676  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
677  // *arg_types, int32_t num_teams, int32_t thread_limit);
679  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
681  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
683  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
684  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
686  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
687  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
689  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
690  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
692 };
693 
694 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
695 /// region.
696 class CleanupTy final : public EHScopeStack::Cleanup {
697  PrePostActionTy *Action;
698 
699 public:
700  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
701  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
702  if (!CGF.HaveInsertPoint())
703  return;
704  Action->Exit(CGF);
705  }
706 };
707 
708 } // anonymous namespace
709 
712  if (PrePostAction) {
713  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
714  Callback(CodeGen, CGF, *PrePostAction);
715  } else {
716  PrePostActionTy Action;
717  Callback(CodeGen, CGF, Action);
718  }
719 }
720 
721 /// Check if the combiner is a call to UDR combiner and if it is so return the
722 /// UDR decl used for reduction.
723 static const OMPDeclareReductionDecl *
724 getReductionInit(const Expr *ReductionOp) {
725  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
726  if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
727  if (auto *DRE =
728  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
729  if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
730  return DRD;
731  return nullptr;
732 }
733 
735  const OMPDeclareReductionDecl *DRD,
736  const Expr *InitOp,
737  Address Private, Address Original,
738  QualType Ty) {
739  if (DRD->getInitializer()) {
740  std::pair<llvm::Function *, llvm::Function *> Reduction =
741  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
742  auto *CE = cast<CallExpr>(InitOp);
743  auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
744  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
745  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
746  auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
747  auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
748  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
749  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
750  [=]() -> Address { return Private; });
751  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
752  [=]() -> Address { return Original; });
753  (void)PrivateScope.Privatize();
754  RValue Func = RValue::get(Reduction.second);
755  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
756  CGF.EmitIgnoredExpr(InitOp);
757  } else {
758  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
759  auto *GV = new llvm::GlobalVariable(
760  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
761  llvm::GlobalValue::PrivateLinkage, Init, ".init");
762  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
763  RValue InitRVal;
764  switch (CGF.getEvaluationKind(Ty)) {
765  case TEK_Scalar:
766  InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
767  break;
768  case TEK_Complex:
769  InitRVal =
771  break;
772  case TEK_Aggregate:
773  InitRVal = RValue::getAggregate(LV.getAddress());
774  break;
775  }
777  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
778  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
779  /*IsInitializer=*/false);
780  }
781 }
782 
783 /// \brief Emit initialization of arrays of complex types.
784 /// \param DestAddr Address of the array.
785 /// \param Type Type of array.
786 /// \param Init Initial expression of array.
787 /// \param SrcAddr Address of the original array.
788 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
789  QualType Type, bool EmitDeclareReductionInit,
790  const Expr *Init,
791  const OMPDeclareReductionDecl *DRD,
792  Address SrcAddr = Address::invalid()) {
793  // Perform element-by-element initialization.
794  QualType ElementTy;
795 
796  // Drill down to the base element type on both arrays.
797  auto ArrayTy = Type->getAsArrayTypeUnsafe();
798  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
799  DestAddr =
800  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
801  if (DRD)
802  SrcAddr =
803  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
804 
805  llvm::Value *SrcBegin = nullptr;
806  if (DRD)
807  SrcBegin = SrcAddr.getPointer();
808  auto DestBegin = DestAddr.getPointer();
809  // Cast from pointer to array type to pointer to single element.
810  auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
811  // The basic structure here is a while-do loop.
812  auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
813  auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
814  auto IsEmpty =
815  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
816  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
817 
818  // Enter the loop body, making that address the current address.
819  auto EntryBB = CGF.Builder.GetInsertBlock();
820  CGF.EmitBlock(BodyBB);
821 
822  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
823 
824  llvm::PHINode *SrcElementPHI = nullptr;
825  Address SrcElementCurrent = Address::invalid();
826  if (DRD) {
827  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
828  "omp.arraycpy.srcElementPast");
829  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
830  SrcElementCurrent =
831  Address(SrcElementPHI,
832  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
833  }
834  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
835  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
836  DestElementPHI->addIncoming(DestBegin, EntryBB);
837  Address DestElementCurrent =
838  Address(DestElementPHI,
839  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
840 
841  // Emit copy.
842  {
843  CodeGenFunction::RunCleanupsScope InitScope(CGF);
844  if (EmitDeclareReductionInit) {
845  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
846  SrcElementCurrent, ElementTy);
847  } else
848  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
849  /*IsInitializer=*/false);
850  }
851 
852  if (DRD) {
853  // Shift the address forward by one element.
854  auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
855  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
856  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
857  }
858 
859  // Shift the address forward by one element.
860  auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
861  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
862  // Check whether we've reached the end.
863  auto Done =
864  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
865  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
866  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
867 
868  // Done.
869  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
870 }
871 
872 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
873  return CGF.EmitOMPSharedLValue(E);
874 }
875 
876 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
877  const Expr *E) {
878  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
879  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
880  return LValue();
881 }
882 
883 void ReductionCodeGen::emitAggregateInitialization(
884  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
885  const OMPDeclareReductionDecl *DRD) {
886  // Emit VarDecl with copy init for arrays.
887  // Get the address of the original variable captured in current
888  // captured region.
889  auto *PrivateVD =
890  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
891  bool EmitDeclareReductionInit =
892  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
893  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
894  EmitDeclareReductionInit,
895  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
896  : PrivateVD->getInit(),
897  DRD, SharedLVal.getAddress());
898 }
899 
902  ArrayRef<const Expr *> ReductionOps) {
903  ClausesData.reserve(Shareds.size());
904  SharedAddresses.reserve(Shareds.size());
905  Sizes.reserve(Shareds.size());
906  BaseDecls.reserve(Shareds.size());
907  auto IPriv = Privates.begin();
908  auto IRed = ReductionOps.begin();
909  for (const auto *Ref : Shareds) {
910  ClausesData.emplace_back(Ref, *IPriv, *IRed);
911  std::advance(IPriv, 1);
912  std::advance(IRed, 1);
913  }
914 }
915 
916 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
917  assert(SharedAddresses.size() == N &&
918  "Number of generated lvalues must be exactly N.");
919  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
920  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
921  SharedAddresses.emplace_back(First, Second);
922 }
923 
925  auto *PrivateVD =
926  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
927  QualType PrivateType = PrivateVD->getType();
928  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
929  if (!PrivateType->isVariablyModifiedType()) {
930  Sizes.emplace_back(
931  CGF.getTypeSize(
932  SharedAddresses[N].first.getType().getNonReferenceType()),
933  nullptr);
934  return;
935  }
936  llvm::Value *Size;
937  llvm::Value *SizeInChars;
938  llvm::Type *ElemType =
939  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
940  ->getElementType();
941  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
942  if (AsArraySection) {
943  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
944  SharedAddresses[N].first.getPointer());
945  Size = CGF.Builder.CreateNUWAdd(
946  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
947  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
948  } else {
949  SizeInChars = CGF.getTypeSize(
950  SharedAddresses[N].first.getType().getNonReferenceType());
951  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
952  }
953  Sizes.emplace_back(SizeInChars, Size);
955  CGF,
956  cast<OpaqueValueExpr>(
957  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
958  RValue::get(Size));
959  CGF.EmitVariablyModifiedType(PrivateType);
960 }
961 
963  llvm::Value *Size) {
964  auto *PrivateVD =
965  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
966  QualType PrivateType = PrivateVD->getType();
967  if (!PrivateType->isVariablyModifiedType()) {
968  assert(!Size && !Sizes[N].second &&
969  "Size should be nullptr for non-variably modified reduction "
970  "items.");
971  return;
972  }
974  CGF,
975  cast<OpaqueValueExpr>(
976  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
977  RValue::get(Size));
978  CGF.EmitVariablyModifiedType(PrivateType);
979 }
980 
982  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
983  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
984  assert(SharedAddresses.size() > N && "No variable was generated");
985  auto *PrivateVD =
986  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
987  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
988  QualType PrivateType = PrivateVD->getType();
989  PrivateAddr = CGF.Builder.CreateElementBitCast(
990  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
991  QualType SharedType = SharedAddresses[N].first.getType();
992  SharedLVal = CGF.MakeAddrLValue(
993  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
994  CGF.ConvertTypeForMem(SharedType)),
995  SharedType, SharedAddresses[N].first.getBaseInfo(),
996  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
997  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
998  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
999  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1000  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1001  PrivateAddr, SharedLVal.getAddress(),
1002  SharedLVal.getType());
1003  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1004  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1005  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1006  PrivateVD->getType().getQualifiers(),
1007  /*IsInitializer=*/false);
1008  }
1009 }
1010 
1012  auto *PrivateVD =
1013  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1014  QualType PrivateType = PrivateVD->getType();
1015  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1016  return DTorKind != QualType::DK_none;
1017 }
1018 
1020  Address PrivateAddr) {
1021  auto *PrivateVD =
1022  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1023  QualType PrivateType = PrivateVD->getType();
1024  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1025  if (needCleanups(N)) {
1026  PrivateAddr = CGF.Builder.CreateElementBitCast(
1027  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1028  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1029  }
1030 }
1031 
1033  LValue BaseLV) {
1034  BaseTy = BaseTy.getNonReferenceType();
1035  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1036  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1037  if (auto *PtrTy = BaseTy->getAs<PointerType>())
1038  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1039  else {
1040  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1041  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1042  }
1043  BaseTy = BaseTy->getPointeeType();
1044  }
1045  return CGF.MakeAddrLValue(
1047  CGF.ConvertTypeForMem(ElTy)),
1048  BaseLV.getType(), BaseLV.getBaseInfo(),
1049  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1050 }
1051 
1053  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1054  llvm::Value *Addr) {
1055  Address Tmp = Address::invalid();
1056  Address TopTmp = Address::invalid();
1057  Address MostTopTmp = Address::invalid();
1058  BaseTy = BaseTy.getNonReferenceType();
1059  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1060  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1061  Tmp = CGF.CreateMemTemp(BaseTy);
1062  if (TopTmp.isValid())
1063  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1064  else
1065  MostTopTmp = Tmp;
1066  TopTmp = Tmp;
1067  BaseTy = BaseTy->getPointeeType();
1068  }
1069  llvm::Type *Ty = BaseLVType;
1070  if (Tmp.isValid())
1071  Ty = Tmp.getElementType();
1072  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1073  if (Tmp.isValid()) {
1074  CGF.Builder.CreateStore(Addr, Tmp);
1075  return MostTopTmp;
1076  }
1077  return Address(Addr, BaseLVAlignment);
1078 }
1079 
1081  Address PrivateAddr) {
1082  const DeclRefExpr *DE;
1083  const VarDecl *OrigVD = nullptr;
1084  if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
1085  auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1086  while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1087  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1088  while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1089  Base = TempASE->getBase()->IgnoreParenImpCasts();
1090  DE = cast<DeclRefExpr>(Base);
1091  OrigVD = cast<VarDecl>(DE->getDecl());
1092  } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
1093  auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1094  while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1095  Base = TempASE->getBase()->IgnoreParenImpCasts();
1096  DE = cast<DeclRefExpr>(Base);
1097  OrigVD = cast<VarDecl>(DE->getDecl());
1098  }
1099  if (OrigVD) {
1100  BaseDecls.emplace_back(OrigVD);
1101  auto OriginalBaseLValue = CGF.EmitLValue(DE);
1102  LValue BaseLValue =
1103  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1104  OriginalBaseLValue);
1105  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1106  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1107  llvm::Value *PrivatePointer =
1109  PrivateAddr.getPointer(),
1110  SharedAddresses[N].first.getAddress().getType());
1111  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1112  return castToBase(CGF, OrigVD->getType(),
1113  SharedAddresses[N].first.getType(),
1114  OriginalBaseLValue.getAddress().getType(),
1115  OriginalBaseLValue.getAlignment(), Ptr);
1116  }
1117  BaseDecls.emplace_back(
1118  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1119  return PrivateAddr;
1120 }
1121 
1123  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1124  return DRD && DRD->getInitializer();
1125 }
1126 
1127 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1128  return CGF.EmitLoadOfPointerLValue(
1129  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1130  getThreadIDVariable()->getType()->castAs<PointerType>());
1131 }
1132 
1133 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1134  if (!CGF.HaveInsertPoint())
1135  return;
1136  // 1.2.2 OpenMP Language Terminology
1137  // Structured block - An executable statement with a single entry at the
1138  // top and a single exit at the bottom.
1139  // The point of exit cannot be a branch out of the structured block.
1140  // longjmp() and throw() must not violate the entry/exit criteria.
1141  CGF.EHStack.pushTerminate();
1142  CodeGen(CGF);
1143  CGF.EHStack.popTerminate();
1144 }
1145 
1146 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1147  CodeGenFunction &CGF) {
1148  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1149  getThreadIDVariable()->getType(),
1151 }
1152 
1154  : CGM(CGM), OffloadEntriesInfoManager(CGM) {
1155  IdentTy = llvm::StructType::create(
1156  "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
1157  CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
1158  CGM.Int8PtrTy /* psource */);
1159  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1160 
1162 }
1163 
1164 void CGOpenMPRuntime::clear() {
1165  InternalVars.clear();
1166 }
1167 
1168 static llvm::Function *
1170  const Expr *CombinerInitializer, const VarDecl *In,
1171  const VarDecl *Out, bool IsCombiner) {
1172  // void .omp_combiner.(Ty *in, Ty *out);
1173  auto &C = CGM.getContext();
1174  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1175  FunctionArgList Args;
1176  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1177  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1178  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1179  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1180  Args.push_back(&OmpOutParm);
1181  Args.push_back(&OmpInParm);
1182  auto &FnInfo =
1183  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1184  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1185  auto *Fn = llvm::Function::Create(
1187  IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
1188  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
1189  Fn->removeFnAttr(llvm::Attribute::NoInline);
1190  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1191  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1192  CodeGenFunction CGF(CGM);
1193  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1194  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1195  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
1197  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1198  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
1199  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1200  .getAddress();
1201  });
1202  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1203  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
1204  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1205  .getAddress();
1206  });
1207  (void)Scope.Privatize();
1208  if (!IsCombiner && Out->hasInit() &&
1209  !CGF.isTrivialInitializer(Out->getInit())) {
1210  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1211  Out->getType().getQualifiers(),
1212  /*IsInitializer=*/true);
1213  }
1214  if (CombinerInitializer)
1215  CGF.EmitIgnoredExpr(CombinerInitializer);
1216  Scope.ForceCleanup();
1217  CGF.FinishFunction();
1218  return Fn;
1219 }
1220 
1222  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1223  if (UDRMap.count(D) > 0)
1224  return;
1225  auto &C = CGM.getContext();
1226  if (!In || !Out) {
1227  In = &C.Idents.get("omp_in");
1228  Out = &C.Idents.get("omp_out");
1229  }
1230  llvm::Function *Combiner = emitCombinerOrInitializer(
1231  CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1232  cast<VarDecl>(D->lookup(Out).front()),
1233  /*IsCombiner=*/true);
1234  llvm::Function *Initializer = nullptr;
1235  if (auto *Init = D->getInitializer()) {
1236  if (!Priv || !Orig) {
1237  Priv = &C.Idents.get("omp_priv");
1238  Orig = &C.Idents.get("omp_orig");
1239  }
1240  Initializer = emitCombinerOrInitializer(
1241  CGM, D->getType(),
1243  : nullptr,
1244  cast<VarDecl>(D->lookup(Orig).front()),
1245  cast<VarDecl>(D->lookup(Priv).front()),
1246  /*IsCombiner=*/false);
1247  }
1248  UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
1249  if (CGF) {
1250  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1251  Decls.second.push_back(D);
1252  }
1253 }
1254 
1255 std::pair<llvm::Function *, llvm::Function *>
1257  auto I = UDRMap.find(D);
1258  if (I != UDRMap.end())
1259  return I->second;
1260  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1261  return UDRMap.lookup(D);
1262 }
1263 
1264 // Layout information for ident_t.
1266  return CGM.getPointerAlign();
1267 }
1269  assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
1270  return CharUnits::fromQuantity(16) + CGM.getPointerSize();
1271 }
1273  // All the fields except the last are i32, so this works beautifully.
1274  return unsigned(Field) * CharUnits::fromQuantity(4);
1275 }
1277  IdentFieldIndex Field,
1278  const llvm::Twine &Name = "") {
1279  auto Offset = getOffsetOfIdentField(Field);
1280  return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
1281 }
1282 
1284  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1285  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1286  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1287  assert(ThreadIDVar->getType()->isPointerType() &&
1288  "thread id variable must be of type kmp_int32 *");
1289  CodeGenFunction CGF(CGM, true);
1290  bool HasCancel = false;
1291  if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1292  HasCancel = OPD->hasCancel();
1293  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1294  HasCancel = OPSD->hasCancel();
1295  else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1296  HasCancel = OPFD->hasCancel();
1297  else if (auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1298  HasCancel = OPFD->hasCancel();
1299  else if (auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1300  HasCancel = OPFD->hasCancel();
1301  else if (auto *OPFD = dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1302  HasCancel = OPFD->hasCancel();
1303  else if (auto *OPFD =
1304  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1305  HasCancel = OPFD->hasCancel();
1306  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1307  HasCancel, OutlinedHelperName);
1308  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1309  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1310 }
1311 
1313  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1314  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1315  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1317  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1318 }
1319 
1321  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1322  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1323  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1325  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1326 }
1327 
1329  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1330  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1331  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1332  bool Tied, unsigned &NumberOfParts) {
1333  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1334  PrePostActionTy &) {
1335  auto *ThreadID = getThreadID(CGF, D.getLocStart());
1336  auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1337  llvm::Value *TaskArgs[] = {
1338  UpLoc, ThreadID,
1339  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1340  TaskTVar->getType()->castAs<PointerType>())
1341  .getPointer()};
1342  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1343  };
1344  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1345  UntiedCodeGen);
1346  CodeGen.setAction(Action);
1347  assert(!ThreadIDVar->getType()->isPointerType() &&
1348  "thread id variable must be of type kmp_int32 for tasks");
1349  auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
1350  auto *TD = dyn_cast<OMPTaskDirective>(&D);
1351  CodeGenFunction CGF(CGM, true);
1352  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1353  InnermostKind,
1354  TD ? TD->hasCancel() : false, Action);
1355  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1356  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
1357  if (!Tied)
1358  NumberOfParts = Action.getNumberOfParts();
1359  return Res;
1360 }
1361 
1362 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1363  CharUnits Align = getIdentAlign(CGM);
1364  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1365  if (!Entry) {
1366  if (!DefaultOpenMPPSource) {
1367  // Initialize default location for psource field of ident_t structure of
1368  // all ident_t objects. Format is ";file;function;line;column;;".
1369  // Taken from
1370  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1371  DefaultOpenMPPSource =
1372  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1373  DefaultOpenMPPSource =
1374  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1375  }
1376 
1377  ConstantInitBuilder builder(CGM);
1378  auto fields = builder.beginStruct(IdentTy);
1379  fields.addInt(CGM.Int32Ty, 0);
1380  fields.addInt(CGM.Int32Ty, Flags);
1381  fields.addInt(CGM.Int32Ty, 0);
1382  fields.addInt(CGM.Int32Ty, 0);
1383  fields.add(DefaultOpenMPPSource);
1384  auto DefaultOpenMPLocation =
1385  fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
1386  llvm::GlobalValue::PrivateLinkage);
1387  DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
1388 
1389  OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1390  }
1391  return Address(Entry, Align);
1392 }
1393 
1395  SourceLocation Loc,
1396  unsigned Flags) {
1397  Flags |= OMP_IDENT_KMPC;
1398  // If no debug info is generated - return global default location.
1399  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1400  Loc.isInvalid())
1401  return getOrCreateDefaultLocation(Flags).getPointer();
1402 
1403  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1404 
1405  Address LocValue = Address::invalid();
1406  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1407  if (I != OpenMPLocThreadIDMap.end())
1408  LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
1409 
1410  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1411  // GetOpenMPThreadID was called before this routine.
1412  if (!LocValue.isValid()) {
1413  // Generate "ident_t .kmpc_loc.addr;"
1414  Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
1415  ".kmpc_loc.addr");
1416  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1417  Elem.second.DebugLoc = AI.getPointer();
1418  LocValue = AI;
1419 
1420  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1421  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1422  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1423  CGM.getSize(getIdentSize(CGF.CGM)));
1424  }
1425 
1426  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1427  Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
1428 
1429  auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1430  if (OMPDebugLoc == nullptr) {
1431  SmallString<128> Buffer2;
1432  llvm::raw_svector_ostream OS2(Buffer2);
1433  // Build debug location
1435  OS2 << ";" << PLoc.getFilename() << ";";
1436  if (const FunctionDecl *FD =
1437  dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
1438  OS2 << FD->getQualifiedNameAsString();
1439  }
1440  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1441  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1442  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1443  }
1444  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1445  CGF.Builder.CreateStore(OMPDebugLoc, PSource);
1446 
1447  // Our callers always pass this to a runtime function, so for
1448  // convenience, go ahead and return a naked pointer.
1449  return LocValue.getPointer();
1450 }
1451 
1453  SourceLocation Loc) {
1454  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1455 
1456  llvm::Value *ThreadID = nullptr;
1457  // Check whether we've already cached a load of the thread id in this
1458  // function.
1459  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1460  if (I != OpenMPLocThreadIDMap.end()) {
1461  ThreadID = I->second.ThreadID;
1462  if (ThreadID != nullptr)
1463  return ThreadID;
1464  }
1465  // If exceptions are enabled, do not use parameter to avoid possible crash.
1466  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1467  !CGF.getLangOpts().CXXExceptions ||
1468  CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1469  if (auto *OMPRegionInfo =
1470  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1471  if (OMPRegionInfo->getThreadIDVariable()) {
1472  // Check if this an outlined function with thread id passed as argument.
1473  auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1474  ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1475  // If value loaded in entry block, cache it and use it everywhere in
1476  // function.
1477  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1478  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1479  Elem.second.ThreadID = ThreadID;
1480  }
1481  return ThreadID;
1482  }
1483  }
1484  }
1485 
1486  // This is not an outlined function region - need to call __kmpc_int32
1487  // kmpc_global_thread_num(ident_t *loc).
1488  // Generate thread id value and cache this value for use across the
1489  // function.
1490  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1491  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1492  auto *Call = CGF.Builder.CreateCall(
1494  emitUpdateLocation(CGF, Loc));
1495  Call->setCallingConv(CGF.getRuntimeCC());
1496  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1497  Elem.second.ThreadID = Call;
1498  return Call;
1499 }
1500 
1502  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1503  if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1504  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1505  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1506  for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1507  UDRMap.erase(D);
1508  }
1509  FunctionUDRMap.erase(CGF.CurFn);
1510  }
1511 }
1512 
1514  if (!IdentTy) {
1515  }
1516  return llvm::PointerType::getUnqual(IdentTy);
1517 }
1518 
1520  if (!Kmpc_MicroTy) {
1521  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1522  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1523  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1524  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1525  }
1526  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1527 }
1528 
1529 llvm::Constant *
1531  llvm::Constant *RTLFn = nullptr;
1532  switch (static_cast<OpenMPRTLFunction>(Function)) {
1533  case OMPRTL__kmpc_fork_call: {
1534  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1535  // microtask, ...);
1536  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1538  llvm::FunctionType *FnTy =
1539  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1540  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1541  break;
1542  }
1544  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1545  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1546  llvm::FunctionType *FnTy =
1547  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1548  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1549  break;
1550  }
1552  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1553  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1554  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1556  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1557  llvm::FunctionType *FnTy =
1558  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1559  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1560  break;
1561  }
1562  case OMPRTL__kmpc_critical: {
1563  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1564  // kmp_critical_name *crit);
1565  llvm::Type *TypeParams[] = {
1567  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1568  llvm::FunctionType *FnTy =
1569  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1570  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1571  break;
1572  }
1574  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1575  // kmp_critical_name *crit, uintptr_t hint);
1576  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1577  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1578  CGM.IntPtrTy};
1579  llvm::FunctionType *FnTy =
1580  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1581  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1582  break;
1583  }
1585  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1586  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1587  // typedef void *(*kmpc_ctor)(void *);
1588  auto KmpcCtorTy =
1589  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1590  /*isVarArg*/ false)->getPointerTo();
1591  // typedef void *(*kmpc_cctor)(void *, void *);
1592  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1593  auto KmpcCopyCtorTy =
1594  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1595  /*isVarArg*/ false)->getPointerTo();
1596  // typedef void (*kmpc_dtor)(void *);
1597  auto KmpcDtorTy =
1598  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1599  ->getPointerTo();
1600  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1601  KmpcCopyCtorTy, KmpcDtorTy};
1602  auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1603  /*isVarArg*/ false);
1604  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1605  break;
1606  }
1608  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1609  // kmp_critical_name *crit);
1610  llvm::Type *TypeParams[] = {
1612  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1613  llvm::FunctionType *FnTy =
1614  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1615  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1616  break;
1617  }
1619  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1620  // global_tid);
1621  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1622  llvm::FunctionType *FnTy =
1623  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1624  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1625  break;
1626  }
1627  case OMPRTL__kmpc_barrier: {
1628  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1629  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1630  llvm::FunctionType *FnTy =
1631  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1632  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1633  break;
1634  }
1636  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1637  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1638  llvm::FunctionType *FnTy =
1639  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1640  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1641  break;
1642  }
1644  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1645  // kmp_int32 num_threads)
1646  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1647  CGM.Int32Ty};
1648  llvm::FunctionType *FnTy =
1649  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1650  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1651  break;
1652  }
1654  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1655  // global_tid);
1656  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1657  llvm::FunctionType *FnTy =
1658  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1659  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1660  break;
1661  }
1663  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1664  // global_tid);
1665  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1666  llvm::FunctionType *FnTy =
1667  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1668  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1669  break;
1670  }
1671  case OMPRTL__kmpc_flush: {
1672  // Build void __kmpc_flush(ident_t *loc);
1673  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1674  llvm::FunctionType *FnTy =
1675  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1676  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1677  break;
1678  }
1679  case OMPRTL__kmpc_master: {
1680  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1681  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1682  llvm::FunctionType *FnTy =
1683  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1684  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1685  break;
1686  }
1687  case OMPRTL__kmpc_end_master: {
1688  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1689  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1690  llvm::FunctionType *FnTy =
1691  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1692  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1693  break;
1694  }
1696  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1697  // int end_part);
1698  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1699  llvm::FunctionType *FnTy =
1700  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1701  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1702  break;
1703  }
1704  case OMPRTL__kmpc_single: {
1705  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1706  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1707  llvm::FunctionType *FnTy =
1708  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1709  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1710  break;
1711  }
1712  case OMPRTL__kmpc_end_single: {
1713  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1714  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1715  llvm::FunctionType *FnTy =
1716  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1717  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1718  break;
1719  }
1721  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1722  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1723  // kmp_routine_entry_t *task_entry);
1724  assert(KmpRoutineEntryPtrTy != nullptr &&
1725  "Type kmp_routine_entry_t must be created.");
1726  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1727  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1728  // Return void * and then cast to particular kmp_task_t type.
1729  llvm::FunctionType *FnTy =
1730  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1731  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1732  break;
1733  }
1734  case OMPRTL__kmpc_omp_task: {
1735  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1736  // *new_task);
1737  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1738  CGM.VoidPtrTy};
1739  llvm::FunctionType *FnTy =
1740  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1741  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1742  break;
1743  }
1744  case OMPRTL__kmpc_copyprivate: {
1745  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1746  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1747  // kmp_int32 didit);
1748  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1749  auto *CpyFnTy =
1750  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1751  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1752  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1753  CGM.Int32Ty};
1754  llvm::FunctionType *FnTy =
1755  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1756  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1757  break;
1758  }
1759  case OMPRTL__kmpc_reduce: {
1760  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1761  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1762  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1763  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1764  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1765  /*isVarArg=*/false);
1766  llvm::Type *TypeParams[] = {
1768  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1769  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770  llvm::FunctionType *FnTy =
1771  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1772  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1773  break;
1774  }
1776  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1777  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1778  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1779  // *lck);
1780  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1781  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1782  /*isVarArg=*/false);
1783  llvm::Type *TypeParams[] = {
1785  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1786  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1787  llvm::FunctionType *FnTy =
1788  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1789  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1790  break;
1791  }
1792  case OMPRTL__kmpc_end_reduce: {
1793  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1794  // kmp_critical_name *lck);
1795  llvm::Type *TypeParams[] = {
1797  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1798  llvm::FunctionType *FnTy =
1799  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1800  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1801  break;
1802  }
1804  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1805  // kmp_critical_name *lck);
1806  llvm::Type *TypeParams[] = {
1808  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1809  llvm::FunctionType *FnTy =
1810  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1811  RTLFn =
1812  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1813  break;
1814  }
1816  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1817  // *new_task);
1818  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1819  CGM.VoidPtrTy};
1820  llvm::FunctionType *FnTy =
1821  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1822  RTLFn =
1823  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1824  break;
1825  }
1827  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1828  // *new_task);
1829  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1830  CGM.VoidPtrTy};
1831  llvm::FunctionType *FnTy =
1832  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1833  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1834  /*Name=*/"__kmpc_omp_task_complete_if0");
1835  break;
1836  }
1837  case OMPRTL__kmpc_ordered: {
1838  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1839  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1840  llvm::FunctionType *FnTy =
1841  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1842  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1843  break;
1844  }
1845  case OMPRTL__kmpc_end_ordered: {
1846  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1847  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1848  llvm::FunctionType *FnTy =
1849  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1850  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1851  break;
1852  }
1854  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1855  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1856  llvm::FunctionType *FnTy =
1857  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1858  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1859  break;
1860  }
1861  case OMPRTL__kmpc_taskgroup: {
1862  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1863  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1864  llvm::FunctionType *FnTy =
1865  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1866  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1867  break;
1868  }
1870  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1871  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1872  llvm::FunctionType *FnTy =
1873  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1874  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1875  break;
1876  }
1878  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1879  // int proc_bind)
1880  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1881  llvm::FunctionType *FnTy =
1882  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1883  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1884  break;
1885  }
1887  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1888  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1889  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1890  llvm::Type *TypeParams[] = {
1893  llvm::FunctionType *FnTy =
1894  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1895  RTLFn =
1896  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1897  break;
1898  }
1900  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1901  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1902  // kmp_depend_info_t *noalias_dep_list);
1903  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1906  llvm::FunctionType *FnTy =
1907  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1908  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1909  break;
1910  }
1912  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1913  // global_tid, kmp_int32 cncl_kind)
1914  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1915  llvm::FunctionType *FnTy =
1916  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1917  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1918  break;
1919  }
1920  case OMPRTL__kmpc_cancel: {
1921  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1922  // kmp_int32 cncl_kind)
1923  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1924  llvm::FunctionType *FnTy =
1925  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1926  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1927  break;
1928  }
1930  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1931  // kmp_int32 num_teams, kmp_int32 num_threads)
1932  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1933  CGM.Int32Ty};
1934  llvm::FunctionType *FnTy =
1935  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1936  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1937  break;
1938  }
1939  case OMPRTL__kmpc_fork_teams: {
1940  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1941  // microtask, ...);
1942  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1944  llvm::FunctionType *FnTy =
1945  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1946  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1947  break;
1948  }
1949  case OMPRTL__kmpc_taskloop: {
1950  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1951  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1952  // sched, kmp_uint64 grainsize, void *task_dup);
1953  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1954  CGM.IntTy,
1955  CGM.VoidPtrTy,
1956  CGM.IntTy,
1957  CGM.Int64Ty->getPointerTo(),
1958  CGM.Int64Ty->getPointerTo(),
1959  CGM.Int64Ty,
1960  CGM.IntTy,
1961  CGM.IntTy,
1962  CGM.Int64Ty,
1963  CGM.VoidPtrTy};
1964  llvm::FunctionType *FnTy =
1965  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1966  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1967  break;
1968  }
1970  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1971  // num_dims, struct kmp_dim *dims);
1972  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1973  CGM.Int32Ty,
1974  CGM.Int32Ty,
1975  CGM.VoidPtrTy};
1976  llvm::FunctionType *FnTy =
1977  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1978  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1979  break;
1980  }
1982  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1983  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1984  llvm::FunctionType *FnTy =
1985  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1986  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1987  break;
1988  }
1990  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1991  // *vec);
1992  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1993  CGM.Int64Ty->getPointerTo()};
1994  llvm::FunctionType *FnTy =
1995  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1996  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1997  break;
1998  }
2000  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2001  // *vec);
2002  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2003  CGM.Int64Ty->getPointerTo()};
2004  llvm::FunctionType *FnTy =
2005  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2006  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2007  break;
2008  }
2010  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2011  // *data);
2012  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2013  llvm::FunctionType *FnTy =
2014  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2015  RTLFn =
2016  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2017  break;
2018  }
2020  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2021  // *d);
2022  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2023  llvm::FunctionType *FnTy =
2024  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2025  RTLFn = CGM.CreateRuntimeFunction(
2026  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2027  break;
2028  }
2029  case OMPRTL__tgt_target: {
2030  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2031  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2032  // *arg_types);
2033  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2034  CGM.VoidPtrTy,
2035  CGM.Int32Ty,
2036  CGM.VoidPtrPtrTy,
2037  CGM.VoidPtrPtrTy,
2038  CGM.SizeTy->getPointerTo(),
2039  CGM.Int64Ty->getPointerTo()};
2040  llvm::FunctionType *FnTy =
2041  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2042  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2043  break;
2044  }
2045  case OMPRTL__tgt_target_teams: {
2046  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2047  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2048  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2049  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2050  CGM.VoidPtrTy,
2051  CGM.Int32Ty,
2052  CGM.VoidPtrPtrTy,
2053  CGM.VoidPtrPtrTy,
2054  CGM.SizeTy->getPointerTo(),
2055  CGM.Int64Ty->getPointerTo(),
2056  CGM.Int32Ty,
2057  CGM.Int32Ty};
2058  llvm::FunctionType *FnTy =
2059  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2060  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2061  break;
2062  }
2063  case OMPRTL__tgt_register_lib: {
2064  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2065  QualType ParamTy =
2067  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2068  llvm::FunctionType *FnTy =
2069  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2070  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2071  break;
2072  }
2074  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2075  QualType ParamTy =
2077  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2078  llvm::FunctionType *FnTy =
2079  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2080  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2081  break;
2082  }
2084  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2085  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2086  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2087  CGM.Int32Ty,
2088  CGM.VoidPtrPtrTy,
2089  CGM.VoidPtrPtrTy,
2090  CGM.SizeTy->getPointerTo(),
2091  CGM.Int64Ty->getPointerTo()};
2092  llvm::FunctionType *FnTy =
2093  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2094  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2095  break;
2096  }
2098  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2099  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2100  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2101  CGM.Int32Ty,
2102  CGM.VoidPtrPtrTy,
2103  CGM.VoidPtrPtrTy,
2104  CGM.SizeTy->getPointerTo(),
2105  CGM.Int64Ty->getPointerTo()};
2106  llvm::FunctionType *FnTy =
2107  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2108  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2109  break;
2110  }
2112  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2113  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2114  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2115  CGM.Int32Ty,
2116  CGM.VoidPtrPtrTy,
2117  CGM.VoidPtrPtrTy,
2118  CGM.SizeTy->getPointerTo(),
2119  CGM.Int64Ty->getPointerTo()};
2120  llvm::FunctionType *FnTy =
2121  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2122  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2123  break;
2124  }
2125  }
2126  assert(RTLFn && "Unable to find OpenMP runtime function");
2127  return RTLFn;
2128 }
2129 
2130 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2131  bool IVSigned) {
2132  assert((IVSize == 32 || IVSize == 64) &&
2133  "IV size is not compatible with the omp runtime");
2134  auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2135  : "__kmpc_for_static_init_4u")
2136  : (IVSigned ? "__kmpc_for_static_init_8"
2137  : "__kmpc_for_static_init_8u");
2138  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2139  auto PtrTy = llvm::PointerType::getUnqual(ITy);
2140  llvm::Type *TypeParams[] = {
2141  getIdentTyPointerTy(), // loc
2142  CGM.Int32Ty, // tid
2143  CGM.Int32Ty, // schedtype
2144  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2145  PtrTy, // p_lower
2146  PtrTy, // p_upper
2147  PtrTy, // p_stride
2148  ITy, // incr
2149  ITy // chunk
2150  };
2151  llvm::FunctionType *FnTy =
2152  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2153  return CGM.CreateRuntimeFunction(FnTy, Name);
2154 }
2155 
2156 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2157  bool IVSigned) {
2158  assert((IVSize == 32 || IVSize == 64) &&
2159  "IV size is not compatible with the omp runtime");
2160  auto Name =
2161  IVSize == 32
2162  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2163  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2164  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2165  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2166  CGM.Int32Ty, // tid
2167  CGM.Int32Ty, // schedtype
2168  ITy, // lower
2169  ITy, // upper
2170  ITy, // stride
2171  ITy // chunk
2172  };
2173  llvm::FunctionType *FnTy =
2174  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2175  return CGM.CreateRuntimeFunction(FnTy, Name);
2176 }
2177 
2178 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2179  bool IVSigned) {
2180  assert((IVSize == 32 || IVSize == 64) &&
2181  "IV size is not compatible with the omp runtime");
2182  auto Name =
2183  IVSize == 32
2184  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2185  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2186  llvm::Type *TypeParams[] = {
2187  getIdentTyPointerTy(), // loc
2188  CGM.Int32Ty, // tid
2189  };
2190  llvm::FunctionType *FnTy =
2191  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2192  return CGM.CreateRuntimeFunction(FnTy, Name);
2193 }
2194 
2195 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2196  bool IVSigned) {
2197  assert((IVSize == 32 || IVSize == 64) &&
2198  "IV size is not compatible with the omp runtime");
2199  auto Name =
2200  IVSize == 32
2201  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2202  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2203  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2204  auto PtrTy = llvm::PointerType::getUnqual(ITy);
2205  llvm::Type *TypeParams[] = {
2206  getIdentTyPointerTy(), // loc
2207  CGM.Int32Ty, // tid
2208  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2209  PtrTy, // p_lower
2210  PtrTy, // p_upper
2211  PtrTy // p_stride
2212  };
2213  llvm::FunctionType *FnTy =
2214  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2215  return CGM.CreateRuntimeFunction(FnTy, Name);
2216 }
2217 
2218 llvm::Constant *
2220  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2222  // Lookup the entry, lazily creating it if necessary.
2224  Twine(CGM.getMangledName(VD)) + ".cache.");
2225 }
2226 
2228  const VarDecl *VD,
2229  Address VDAddr,
2230  SourceLocation Loc) {
2231  if (CGM.getLangOpts().OpenMPUseTLS &&
2233  return VDAddr;
2234 
2235  auto VarTy = VDAddr.getElementType();
2236  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2237  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2238  CGM.Int8PtrTy),
2241  return Address(CGF.EmitRuntimeCall(
2243  VDAddr.getAlignment());
2244 }
2245 
2247  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2248  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2249  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2250  // library.
2251  auto OMPLoc = emitUpdateLocation(CGF, Loc);
2253  OMPLoc);
2254  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2255  // to register constructor/destructor for variable.
2256  llvm::Value *Args[] = {OMPLoc,
2257  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2258  CGM.VoidPtrTy),
2259  Ctor, CopyCtor, Dtor};
2260  CGF.EmitRuntimeCall(
2262 }
2263 
2265  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2266  bool PerformInit, CodeGenFunction *CGF) {
2267  if (CGM.getLangOpts().OpenMPUseTLS &&
2269  return nullptr;
2270 
2271  VD = VD->getDefinition(CGM.getContext());
2272  if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2273  ThreadPrivateWithDefinition.insert(VD);
2274  QualType ASTTy = VD->getType();
2275 
2276  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2277  auto Init = VD->getAnyInitializer();
2278  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2279  // Generate function that re-emits the declaration's initializer into the
2280  // threadprivate copy of the variable VD
2281  CodeGenFunction CtorCGF(CGM);
2282  FunctionArgList Args;
2285  Args.push_back(&Dst);
2286 
2288  CGM.getContext().VoidPtrTy, Args);
2289  auto FTy = CGM.getTypes().GetFunctionType(FI);
2291  FTy, ".__kmpc_global_ctor_.", FI, Loc);
2292  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2293  Args, SourceLocation());
2294  auto ArgVal = CtorCGF.EmitLoadOfScalar(
2295  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2296  CGM.getContext().VoidPtrTy, Dst.getLocation());
2297  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2298  Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
2299  CtorCGF.ConvertTypeForMem(ASTTy));
2300  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2301  /*IsInitializer=*/true);
2302  ArgVal = CtorCGF.EmitLoadOfScalar(
2303  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2304  CGM.getContext().VoidPtrTy, Dst.getLocation());
2305  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2306  CtorCGF.FinishFunction();
2307  Ctor = Fn;
2308  }
2309  if (VD->getType().isDestructedType() != QualType::DK_none) {
2310  // Generate function that emits destructor call for the threadprivate copy
2311  // of the variable VD
2312  CodeGenFunction DtorCGF(CGM);
2313  FunctionArgList Args;
2316  Args.push_back(&Dst);
2317 
2319  CGM.getContext().VoidTy, Args);
2320  auto FTy = CGM.getTypes().GetFunctionType(FI);
2322  FTy, ".__kmpc_global_dtor_.", FI, Loc);
2323  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2324  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2325  SourceLocation());
2326  // Create a scope with an artificial location for the body of this function.
2327  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2328  auto ArgVal = DtorCGF.EmitLoadOfScalar(
2329  DtorCGF.GetAddrOfLocalVar(&Dst),
2330  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2331  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2332  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2333  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2334  DtorCGF.FinishFunction();
2335  Dtor = Fn;
2336  }
2337  // Do not emit init function if it is not required.
2338  if (!Ctor && !Dtor)
2339  return nullptr;
2340 
2341  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2342  auto CopyCtorTy =
2343  llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2344  /*isVarArg=*/false)->getPointerTo();
2345  // Copying constructor for the threadprivate variable.
2346  // Must be NULL - reserved by runtime, but currently it requires that this
2347  // parameter is always NULL. Otherwise it fires assertion.
2348  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2349  if (Ctor == nullptr) {
2350  auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2351  /*isVarArg=*/false)->getPointerTo();
2352  Ctor = llvm::Constant::getNullValue(CtorTy);
2353  }
2354  if (Dtor == nullptr) {
2355  auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2356  /*isVarArg=*/false)->getPointerTo();
2357  Dtor = llvm::Constant::getNullValue(DtorTy);
2358  }
2359  if (!CGF) {
2360  auto InitFunctionTy =
2361  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2362  auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2363  InitFunctionTy, ".__omp_threadprivate_init_.",
2365  CodeGenFunction InitCGF(CGM);
2366  FunctionArgList ArgList;
2367  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2368  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2369  Loc);
2370  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2371  InitCGF.FinishFunction();
2372  return InitFunction;
2373  }
2374  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2375  }
2376  return nullptr;
2377 }
2378 
2380  QualType VarType,
2381  StringRef Name) {
2382  llvm::Twine VarName(Name, ".artificial.");
2383  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2384  llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
2385  llvm::Value *Args[] = {
2387  getThreadID(CGF, SourceLocation()),
2389  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2390  /*IsSigned=*/false),
2391  getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
2392  return Address(
2394  CGF.EmitRuntimeCall(
2396  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2397  CGM.getPointerAlign());
2398 }
2399 
2400 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
2401 /// function. Here is the logic:
2402 /// if (Cond) {
2403 /// ThenGen();
2404 /// } else {
2405 /// ElseGen();
2406 /// }
2408  const RegionCodeGenTy &ThenGen,
2409  const RegionCodeGenTy &ElseGen) {
2410  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2411 
2412  // If the condition constant folds and can be elided, try to avoid emitting
2413  // the condition and the dead arm of the if/else.
2414  bool CondConstant;
2415  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2416  if (CondConstant)
2417  ThenGen(CGF);
2418  else
2419  ElseGen(CGF);
2420  return;
2421  }
2422 
2423  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2424  // emit the conditional branch.
2425  auto ThenBlock = CGF.createBasicBlock("omp_if.then");
2426  auto ElseBlock = CGF.createBasicBlock("omp_if.else");
2427  auto ContBlock = CGF.createBasicBlock("omp_if.end");
2428  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2429 
2430  // Emit the 'then' code.
2431  CGF.EmitBlock(ThenBlock);
2432  ThenGen(CGF);
2433  CGF.EmitBranch(ContBlock);
2434  // Emit the 'else' code if present.
2435  // There is no need to emit line number for unconditional branch.
2437  CGF.EmitBlock(ElseBlock);
2438  ElseGen(CGF);
2439  // There is no need to emit line number for unconditional branch.
2441  CGF.EmitBranch(ContBlock);
2442  // Emit the continuation block for code after the if.
2443  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2444 }
2445 
2447  llvm::Value *OutlinedFn,
2448  ArrayRef<llvm::Value *> CapturedVars,
2449  const Expr *IfCond) {
2450  if (!CGF.HaveInsertPoint())
2451  return;
2452  auto *RTLoc = emitUpdateLocation(CGF, Loc);
2453  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2454  PrePostActionTy &) {
2455  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2456  auto &RT = CGF.CGM.getOpenMPRuntime();
2457  llvm::Value *Args[] = {
2458  RTLoc,
2459  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2460  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2462  RealArgs.append(std::begin(Args), std::end(Args));
2463  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2464 
2465  auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2466  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2467  };
2468  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2469  PrePostActionTy &) {
2470  auto &RT = CGF.CGM.getOpenMPRuntime();
2471  auto ThreadID = RT.getThreadID(CGF, Loc);
2472  // Build calls:
2473  // __kmpc_serialized_parallel(&Loc, GTid);
2474  llvm::Value *Args[] = {RTLoc, ThreadID};
2475  CGF.EmitRuntimeCall(
2476  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2477 
2478  // OutlinedFn(&GTid, &zero, CapturedStruct);
2479  auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2480  Address ZeroAddr =
2481  CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
2482  /*Name*/ ".zero.addr");
2483  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2484  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2485  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2486  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2487  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2488  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2489 
2490  // __kmpc_end_serialized_parallel(&Loc, GTid);
2491  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2492  CGF.EmitRuntimeCall(
2493  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2494  EndArgs);
2495  };
2496  if (IfCond)
2497  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2498  else {
2499  RegionCodeGenTy ThenRCG(ThenGen);
2500  ThenRCG(CGF);
2501  }
2502 }
2503 
2504 // If we're inside an (outlined) parallel region, use the region info's
2505 // thread-ID variable (it is passed in a first argument of the outlined function
2506 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2507 // regular serial code region, get thread ID by calling kmp_int32
2508 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2509 // return the address of that temp.
2511  SourceLocation Loc) {
2512  if (auto *OMPRegionInfo =
2513  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2514  if (OMPRegionInfo->getThreadIDVariable())
2515  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2516 
2517  auto ThreadID = getThreadID(CGF, Loc);
2518  auto Int32Ty =
2519  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2520  auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2521  CGF.EmitStoreOfScalar(ThreadID,
2522  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2523 
2524  return ThreadIDTemp;
2525 }
2526 
2527 llvm::Constant *
2529  const llvm::Twine &Name) {
2530  SmallString<256> Buffer;
2531  llvm::raw_svector_ostream Out(Buffer);
2532  Out << Name;
2533  auto RuntimeName = Out.str();
2534  auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2535  if (Elem.second) {
2536  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2537  "OMP internal variable has different type than requested");
2538  return &*Elem.second;
2539  }
2540 
2541  return Elem.second = new llvm::GlobalVariable(
2542  CGM.getModule(), Ty, /*IsConstant*/ false,
2543  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2544  Elem.first());
2545 }
2546 
2548  llvm::Twine Name(".gomp_critical_user_", CriticalName);
2549  return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2550 }
2551 
2552 namespace {
2553 /// Common pre(post)-action for different OpenMP constructs.
2554 class CommonActionTy final : public PrePostActionTy {
2555  llvm::Value *EnterCallee;
2556  ArrayRef<llvm::Value *> EnterArgs;
2557  llvm::Value *ExitCallee;
2558  ArrayRef<llvm::Value *> ExitArgs;
2559  bool Conditional;
2560  llvm::BasicBlock *ContBlock = nullptr;
2561 
2562 public:
2563  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2564  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2565  bool Conditional = false)
2566  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2567  ExitArgs(ExitArgs), Conditional(Conditional) {}
2568  void Enter(CodeGenFunction &CGF) override {
2569  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2570  if (Conditional) {
2571  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2572  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2573  ContBlock = CGF.createBasicBlock("omp_if.end");
2574  // Generate the branch (If-stmt)
2575  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2576  CGF.EmitBlock(ThenBlock);
2577  }
2578  }
2579  void Done(CodeGenFunction &CGF) {
2580  // Emit the rest of blocks/branches
2581  CGF.EmitBranch(ContBlock);
2582  CGF.EmitBlock(ContBlock, true);
2583  }
2584  void Exit(CodeGenFunction &CGF) override {
2585  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2586  }
2587 };
2588 } // anonymous namespace
2589 
2591  StringRef CriticalName,
2592  const RegionCodeGenTy &CriticalOpGen,
2593  SourceLocation Loc, const Expr *Hint) {
2594  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2595  // CriticalOpGen();
2596  // __kmpc_end_critical(ident_t *, gtid, Lock);
2597  // Prepare arguments and build a call to __kmpc_critical
2598  if (!CGF.HaveInsertPoint())
2599  return;
2600  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2601  getCriticalRegionLock(CriticalName)};
2602  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2603  std::end(Args));
2604  if (Hint) {
2605  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2606  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2607  }
2608  CommonActionTy Action(
2612  CriticalOpGen.setAction(Action);
2613  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2614 }
2615 
2617  const RegionCodeGenTy &MasterOpGen,
2618  SourceLocation Loc) {
2619  if (!CGF.HaveInsertPoint())
2620  return;
2621  // if(__kmpc_master(ident_t *, gtid)) {
2622  // MasterOpGen();
2623  // __kmpc_end_master(ident_t *, gtid);
2624  // }
2625  // Prepare arguments and build a call to __kmpc_master
2626  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2627  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2629  /*Conditional=*/true);
2630  MasterOpGen.setAction(Action);
2631  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2632  Action.Done(CGF);
2633 }
2634 
2636  SourceLocation Loc) {
2637  if (!CGF.HaveInsertPoint())
2638  return;
2639  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2640  llvm::Value *Args[] = {
2641  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2642  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2644  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2645  Region->emitUntiedSwitch(CGF);
2646 }
2647 
2649  const RegionCodeGenTy &TaskgroupOpGen,
2650  SourceLocation Loc) {
2651  if (!CGF.HaveInsertPoint())
2652  return;
2653  // __kmpc_taskgroup(ident_t *, gtid);
2654  // TaskgroupOpGen();
2655  // __kmpc_end_taskgroup(ident_t *, gtid);
2656  // Prepare arguments and build a call to __kmpc_taskgroup
2657  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2658  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2660  Args);
2661  TaskgroupOpGen.setAction(Action);
2662  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2663 }
2664 
2665 /// Given an array of pointers to variables, project the address of a
2666 /// given variable.
2668  unsigned Index, const VarDecl *Var) {
2669  // Pull out the pointer to the variable.
2670  Address PtrAddr =
2671  CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2672  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2673 
2674  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2675  Addr = CGF.Builder.CreateElementBitCast(
2676  Addr, CGF.ConvertTypeForMem(Var->getType()));
2677  return Addr;
2678 }
2679 
2681  CodeGenModule &CGM, llvm::Type *ArgsType,
2682  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2683  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2684  auto &C = CGM.getContext();
2685  // void copy_func(void *LHSArg, void *RHSArg);
2686  FunctionArgList Args;
2687  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2688  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2689  Args.push_back(&LHSArg);
2690  Args.push_back(&RHSArg);
2691  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2692  auto *Fn = llvm::Function::Create(
2694  ".omp.copyprivate.copy_func", &CGM.getModule());
2695  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2696  CodeGenFunction CGF(CGM);
2697  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2698  // Dest = (void*[n])(LHSArg);
2699  // Src = (void*[n])(RHSArg);
2701  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2702  ArgsType), CGF.getPointerAlign());
2704  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2705  ArgsType), CGF.getPointerAlign());
2706  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2707  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2708  // ...
2709  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2710  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2711  auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2712  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2713 
2714  auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2715  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2716 
2717  auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2718  QualType Type = VD->getType();
2719  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2720  }
2721  CGF.FinishFunction();
2722  return Fn;
2723 }
2724 
2726  const RegionCodeGenTy &SingleOpGen,
2727  SourceLocation Loc,
2728  ArrayRef<const Expr *> CopyprivateVars,
2729  ArrayRef<const Expr *> SrcExprs,
2730  ArrayRef<const Expr *> DstExprs,
2731  ArrayRef<const Expr *> AssignmentOps) {
2732  if (!CGF.HaveInsertPoint())
2733  return;
2734  assert(CopyprivateVars.size() == SrcExprs.size() &&
2735  CopyprivateVars.size() == DstExprs.size() &&
2736  CopyprivateVars.size() == AssignmentOps.size());
2737  auto &C = CGM.getContext();
2738  // int32 did_it = 0;
2739  // if(__kmpc_single(ident_t *, gtid)) {
2740  // SingleOpGen();
2741  // __kmpc_end_single(ident_t *, gtid);
2742  // did_it = 1;
2743  // }
2744  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2745  // <copy_func>, did_it);
2746 
2747  Address DidIt = Address::invalid();
2748  if (!CopyprivateVars.empty()) {
2749  // int32 did_it = 0;
2750  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2751  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2752  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2753  }
2754  // Prepare arguments and build a call to __kmpc_single
2755  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2756  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2758  /*Conditional=*/true);
2759  SingleOpGen.setAction(Action);
2760  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2761  if (DidIt.isValid()) {
2762  // did_it = 1;
2763  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2764  }
2765  Action.Done(CGF);
2766  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2767  // <copy_func>, did_it);
2768  if (DidIt.isValid()) {
2769  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2770  auto CopyprivateArrayTy =
2771  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2772  /*IndexTypeQuals=*/0);
2773  // Create a list of all private variables for copyprivate.
2774  Address CopyprivateList =
2775  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2776  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2777  Address Elem = CGF.Builder.CreateConstArrayGEP(
2778  CopyprivateList, I, CGF.getPointerSize());
2779  CGF.Builder.CreateStore(
2781  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2782  Elem);
2783  }
2784  // Build function that copies private values from single region to all other
2785  // threads in the corresponding parallel region.
2786  auto *CpyFn = emitCopyprivateCopyFunction(
2787  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2788  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2789  auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2790  Address CL =
2791  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2792  CGF.VoidPtrTy);
2793  auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2794  llvm::Value *Args[] = {
2795  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2796  getThreadID(CGF, Loc), // i32 <gtid>
2797  BufSize, // size_t <buf_size>
2798  CL.getPointer(), // void *<copyprivate list>
2799  CpyFn, // void (*) (void *, void *) <copy_func>
2800  DidItVal // i32 did_it
2801  };
2803  }
2804 }
2805 
2807  const RegionCodeGenTy &OrderedOpGen,
2808  SourceLocation Loc, bool IsThreads) {
2809  if (!CGF.HaveInsertPoint())
2810  return;
2811  // __kmpc_ordered(ident_t *, gtid);
2812  // OrderedOpGen();
2813  // __kmpc_end_ordered(ident_t *, gtid);
2814  // Prepare arguments and build a call to __kmpc_ordered
2815  if (IsThreads) {
2816  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2817  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2819  Args);
2820  OrderedOpGen.setAction(Action);
2821  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2822  return;
2823  }
2824  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2825 }
2826 
2828  OpenMPDirectiveKind Kind, bool EmitChecks,
2829  bool ForceSimpleCall) {
2830  if (!CGF.HaveInsertPoint())
2831  return;
2832  // Build call __kmpc_cancel_barrier(loc, thread_id);
2833  // Build call __kmpc_barrier(loc, thread_id);
2834  unsigned Flags;
2835  if (Kind == OMPD_for)
2836  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2837  else if (Kind == OMPD_sections)
2838  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2839  else if (Kind == OMPD_single)
2840  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2841  else if (Kind == OMPD_barrier)
2842  Flags = OMP_IDENT_BARRIER_EXPL;
2843  else
2844  Flags = OMP_IDENT_BARRIER_IMPL;
2845  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2846  // thread_id);
2847  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2848  getThreadID(CGF, Loc)};
2849  if (auto *OMPRegionInfo =
2850  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2851  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2852  auto *Result = CGF.EmitRuntimeCall(
2854  if (EmitChecks) {
2855  // if (__kmpc_cancel_barrier()) {
2856  // exit from construct;
2857  // }
2858  auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2859  auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2860  auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2861  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2862  CGF.EmitBlock(ExitBB);
2863  // exit from construct;
2864  auto CancelDestination =
2865  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2866  CGF.EmitBranchThroughCleanup(CancelDestination);
2867  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2868  }
2869  return;
2870  }
2871  }
2873 }
2874 
2875 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2877  bool Chunked, bool Ordered) {
2878  switch (ScheduleKind) {
2879  case OMPC_SCHEDULE_static:
2880  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2881  : (Ordered ? OMP_ord_static : OMP_sch_static);
2882  case OMPC_SCHEDULE_dynamic:
2884  case OMPC_SCHEDULE_guided:
2886  case OMPC_SCHEDULE_runtime:
2887  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2888  case OMPC_SCHEDULE_auto:
2889  return Ordered ? OMP_ord_auto : OMP_sch_auto;
2890  case OMPC_SCHEDULE_unknown:
2891  assert(!Chunked && "chunk was specified but schedule kind not known");
2892  return Ordered ? OMP_ord_static : OMP_sch_static;
2893  }
2894  llvm_unreachable("Unexpected runtime schedule");
2895 }
2896 
2897 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2898 static OpenMPSchedType
2900  // only static is allowed for dist_schedule
2902 }
2903 
2905  bool Chunked) const {
2906  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2907  return Schedule == OMP_sch_static;
2908 }
2909 
2911  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2912  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2913  return Schedule == OMP_dist_sch_static;
2914 }
2915 
2916 
2918  auto Schedule =
2919  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2920  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2921  return Schedule != OMP_sch_static;
2922 }
2923 
2927  int Modifier = 0;
2928  switch (M1) {
2929  case OMPC_SCHEDULE_MODIFIER_monotonic:
2930  Modifier = OMP_sch_modifier_monotonic;
2931  break;
2932  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2933  Modifier = OMP_sch_modifier_nonmonotonic;
2934  break;
2935  case OMPC_SCHEDULE_MODIFIER_simd:
2936  if (Schedule == OMP_sch_static_chunked)
2938  break;
2941  break;
2942  }
2943  switch (M2) {
2944  case OMPC_SCHEDULE_MODIFIER_monotonic:
2945  Modifier = OMP_sch_modifier_monotonic;
2946  break;
2947  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2948  Modifier = OMP_sch_modifier_nonmonotonic;
2949  break;
2950  case OMPC_SCHEDULE_MODIFIER_simd:
2951  if (Schedule == OMP_sch_static_chunked)
2953  break;
2956  break;
2957  }
2958  return Schedule | Modifier;
2959 }
2960 
2962  CodeGenFunction &CGF, SourceLocation Loc,
2963  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2964  bool Ordered, const DispatchRTInput &DispatchValues) {
2965  if (!CGF.HaveInsertPoint())
2966  return;
2968  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2969  assert(Ordered ||
2970  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2971  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2972  Schedule != OMP_sch_static_balanced_chunked));
2973  // Call __kmpc_dispatch_init(
2974  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2975  // kmp_int[32|64] lower, kmp_int[32|64] upper,
2976  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2977 
2978  // If the Chunk was not specified in the clause - use default value 1.
2979  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2980  : CGF.Builder.getIntN(IVSize, 1);
2981  llvm::Value *Args[] = {
2982  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2983  CGF.Builder.getInt32(addMonoNonMonoModifier(
2984  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2985  DispatchValues.LB, // Lower
2986  DispatchValues.UB, // Upper
2987  CGF.Builder.getIntN(IVSize, 1), // Stride
2988  Chunk // Chunk
2989  };
2990  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2991 }
2992 
2994  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2995  llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2997  const CGOpenMPRuntime::StaticRTInput &Values) {
2998  if (!CGF.HaveInsertPoint())
2999  return;
3000 
3001  assert(!Values.Ordered);
3002  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3003  Schedule == OMP_sch_static_balanced_chunked ||
3004  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3005  Schedule == OMP_dist_sch_static ||
3006  Schedule == OMP_dist_sch_static_chunked);
3007 
3008  // Call __kmpc_for_static_init(
3009  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3010  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3011  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3012  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3013  llvm::Value *Chunk = Values.Chunk;
3014  if (Chunk == nullptr) {
3015  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3016  Schedule == OMP_dist_sch_static) &&
3017  "expected static non-chunked schedule");
3018  // If the Chunk was not specified in the clause - use default value 1.
3019  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3020  } else {
3021  assert((Schedule == OMP_sch_static_chunked ||
3022  Schedule == OMP_sch_static_balanced_chunked ||
3023  Schedule == OMP_ord_static_chunked ||
3024  Schedule == OMP_dist_sch_static_chunked) &&
3025  "expected static chunked schedule");
3026  }
3027  llvm::Value *Args[] = {
3028  UpdateLocation,
3029  ThreadId,
3030  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3031  M2)), // Schedule type
3032  Values.IL.getPointer(), // &isLastIter
3033  Values.LB.getPointer(), // &LB
3034  Values.UB.getPointer(), // &UB
3035  Values.ST.getPointer(), // &Stride
3036  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3037  Chunk // Chunk
3038  };
3039  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3040 }
3041 
3043  SourceLocation Loc,
3044  OpenMPDirectiveKind DKind,
3045  const OpenMPScheduleTy &ScheduleKind,
3046  const StaticRTInput &Values) {
3047  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3048  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3049  assert(isOpenMPWorksharingDirective(DKind) &&
3050  "Expected loop-based or sections-based directive.");
3051  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3052  isOpenMPLoopDirective(DKind)
3053  ? OMP_IDENT_WORK_LOOP
3054  : OMP_IDENT_WORK_SECTIONS);
3055  auto *ThreadId = getThreadID(CGF, Loc);
3056  auto *StaticInitFunction =
3058  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3059  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3060 }
3061 
3063  CodeGenFunction &CGF, SourceLocation Loc,
3064  OpenMPDistScheduleClauseKind SchedKind,
3065  const CGOpenMPRuntime::StaticRTInput &Values) {
3066  OpenMPSchedType ScheduleNum =
3067  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3068  auto *UpdatedLocation =
3069  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3070  auto *ThreadId = getThreadID(CGF, Loc);
3071  auto *StaticInitFunction =
3072  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3073  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3074  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3076 }
3077 
3079  SourceLocation Loc,
3080  OpenMPDirectiveKind DKind) {
3081  if (!CGF.HaveInsertPoint())
3082  return;
3083  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3084  llvm::Value *Args[] = {
3085  emitUpdateLocation(CGF, Loc,
3087  ? OMP_IDENT_WORK_DISTRIBUTE
3088  : isOpenMPLoopDirective(DKind)
3089  ? OMP_IDENT_WORK_LOOP
3090  : OMP_IDENT_WORK_SECTIONS),
3091  getThreadID(CGF, Loc)};
3093  Args);
3094 }
3095 
3097  SourceLocation Loc,
3098  unsigned IVSize,
3099  bool IVSigned) {
3100  if (!CGF.HaveInsertPoint())
3101  return;
3102  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3103  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3104  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3105 }
3106 
3108  SourceLocation Loc, unsigned IVSize,
3109  bool IVSigned, Address IL,
3110  Address LB, Address UB,
3111  Address ST) {
3112  // Call __kmpc_dispatch_next(
3113  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3114  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3115  // kmp_int[32|64] *p_stride);
3116  llvm::Value *Args[] = {
3117  emitUpdateLocation(CGF, Loc),
3118  getThreadID(CGF, Loc),
3119  IL.getPointer(), // &isLastIter
3120  LB.getPointer(), // &Lower
3121  UB.getPointer(), // &Upper
3122  ST.getPointer() // &Stride
3123  };
3124  llvm::Value *Call =
3125  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3126  return CGF.EmitScalarConversion(
3127  Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
3128  CGF.getContext().BoolTy, Loc);
3129 }
3130 
3132  llvm::Value *NumThreads,
3133  SourceLocation Loc) {
3134  if (!CGF.HaveInsertPoint())
3135  return;
3136  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3137  llvm::Value *Args[] = {
3138  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3139  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3141  Args);
3142 }
3143 
3145  OpenMPProcBindClauseKind ProcBind,
3146  SourceLocation Loc) {
3147  if (!CGF.HaveInsertPoint())
3148  return;
3149  // Constants for proc bind value accepted by the runtime.
3150  enum ProcBindTy {
3151  ProcBindFalse = 0,
3152  ProcBindTrue,
3153  ProcBindMaster,
3154  ProcBindClose,
3155  ProcBindSpread,
3156  ProcBindIntel,
3157  ProcBindDefault
3158  } RuntimeProcBind;
3159  switch (ProcBind) {
3160  case OMPC_PROC_BIND_master:
3161  RuntimeProcBind = ProcBindMaster;
3162  break;
3163  case OMPC_PROC_BIND_close:
3164  RuntimeProcBind = ProcBindClose;
3165  break;
3166  case OMPC_PROC_BIND_spread:
3167  RuntimeProcBind = ProcBindSpread;
3168  break;
3170  llvm_unreachable("Unsupported proc_bind value.");
3171  }
3172  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3173  llvm::Value *Args[] = {
3174  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3175  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3177 }
3178 
3180  SourceLocation Loc) {
3181  if (!CGF.HaveInsertPoint())
3182  return;
3183  // Build call void __kmpc_flush(ident_t *loc)
3185  emitUpdateLocation(CGF, Loc));
3186 }
3187 
3188 namespace {
3189 /// \brief Indexes of fields for type kmp_task_t.
3191  /// \brief List of shared variables.
3192  KmpTaskTShareds,
3193  /// \brief Task routine.
3194  KmpTaskTRoutine,
3195  /// \brief Partition id for the untied tasks.
3196  KmpTaskTPartId,
3197  /// Function with call of destructors for private variables.
3198  Data1,
3199  /// Task priority.
3200  Data2,
3201  /// (Taskloops only) Lower bound.
3202  KmpTaskTLowerBound,
3203  /// (Taskloops only) Upper bound.
3204  KmpTaskTUpperBound,
3205  /// (Taskloops only) Stride.
3206  KmpTaskTStride,
3207  /// (Taskloops only) Is last iteration flag.
3208  KmpTaskTLastIter,
3209  /// (Taskloops only) Reduction data.
3210  KmpTaskTReductions,
3211 };
3212 } // anonymous namespace
3213 
3214 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3215  // FIXME: Add other entries type when they become supported.
3216  return OffloadEntriesTargetRegion.empty();
3217 }
3218 
3219 /// \brief Initialize target region entry.
3220 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3221  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3222  StringRef ParentName, unsigned LineNum,
3223  unsigned Order) {
3224  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3225  "only required for the device "
3226  "code generation.");
3227  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3228  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3229  /*Flags=*/0);
3230  ++OffloadingEntriesNum;
3231 }
3232 
3233 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3234  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3235  StringRef ParentName, unsigned LineNum,
3236  llvm::Constant *Addr, llvm::Constant *ID,
3237  int32_t Flags) {
3238  // If we are emitting code for a target, the entry is already initialized,
3239  // only has to be registered.
3240  if (CGM.getLangOpts().OpenMPIsDevice) {
3241  assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3242  "Entry must exist.");
3243  auto &Entry =
3244  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3245  assert(Entry.isValid() && "Entry not initialized!");
3246  Entry.setAddress(Addr);
3247  Entry.setID(ID);
3248  Entry.setFlags(Flags);
3249  return;
3250  } else {
3251  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags);
3252  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3253  }
3254 }
3255 
3256 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3257  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3258  unsigned LineNum) const {
3259  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3260  if (PerDevice == OffloadEntriesTargetRegion.end())
3261  return false;
3262  auto PerFile = PerDevice->second.find(FileID);
3263  if (PerFile == PerDevice->second.end())
3264  return false;
3265  auto PerParentName = PerFile->second.find(ParentName);
3266  if (PerParentName == PerFile->second.end())
3267  return false;
3268  auto PerLine = PerParentName->second.find(LineNum);
3269  if (PerLine == PerParentName->second.end())
3270  return false;
3271  // Fail if this entry is already registered.
3272  if (PerLine->second.getAddress() || PerLine->second.getID())
3273  return false;
3274  return true;
3275 }
3276 
3277 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3278  const OffloadTargetRegionEntryInfoActTy &Action) {
3279  // Scan all target region entries and perform the provided action.
3280  for (auto &D : OffloadEntriesTargetRegion)
3281  for (auto &F : D.second)
3282  for (auto &P : F.second)
3283  for (auto &L : P.second)
3284  Action(D.first, F.first, P.first(), L.first, L.second);
3285 }
3286 
3287 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
3288 /// \a Codegen. This is used to emit the two functions that register and
3289 /// unregister the descriptor of the current compilation unit.
3290 static llvm::Function *
3292  const RegionCodeGenTy &Codegen) {
3293  auto &C = CGM.getContext();
3294  FunctionArgList Args;
3295  ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3296  Args.push_back(&DummyPtr);
3297 
3298  CodeGenFunction CGF(CGM);
3299  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3300  auto FTy = CGM.getTypes().GetFunctionType(FI);
3301  auto *Fn =
3302  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
3303  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
3304  Codegen(CGF);
3305  CGF.FinishFunction();
3306  return Fn;
3307 }
3308 
3309 llvm::Function *
3311 
3312  // If we don't have entries or if we are emitting code for the device, we
3313  // don't need to do anything.
3314  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3315  return nullptr;
3316 
3317  auto &M = CGM.getModule();
3318  auto &C = CGM.getContext();
3319 
3320  // Get list of devices we care about
3321  auto &Devices = CGM.getLangOpts().OMPTargetTriples;
3322 
3323  // We should be creating an offloading descriptor only if there are devices
3324  // specified.
3325  assert(!Devices.empty() && "No OpenMP offloading devices??");
3326 
3327  // Create the external variables that will point to the begin and end of the
3328  // host entries section. These will be defined by the linker.
3329  auto *OffloadEntryTy =
3331  llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
3332  M, OffloadEntryTy, /*isConstant=*/true,
3333  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3334  ".omp_offloading.entries_begin");
3335  llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
3336  M, OffloadEntryTy, /*isConstant=*/true,
3337  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3338  ".omp_offloading.entries_end");
3339 
3340  // Create all device images
3341  auto *DeviceImageTy = cast<llvm::StructType>(
3343  ConstantInitBuilder DeviceImagesBuilder(CGM);
3344  auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
3345 
3346  for (unsigned i = 0; i < Devices.size(); ++i) {
3347  StringRef T = Devices[i].getTriple();
3348  auto *ImgBegin = new llvm::GlobalVariable(
3349  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3350  /*Initializer=*/nullptr,
3351  Twine(".omp_offloading.img_start.") + Twine(T));
3352  auto *ImgEnd = new llvm::GlobalVariable(
3353  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3354  /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
3355 
3356  auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
3357  Dev.add(ImgBegin);
3358  Dev.add(ImgEnd);
3359  Dev.add(HostEntriesBegin);
3360  Dev.add(HostEntriesEnd);
3361  Dev.finishAndAddTo(DeviceImagesEntries);
3362  }
3363 
3364  // Create device images global array.
3365  llvm::GlobalVariable *DeviceImages =
3366  DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
3367  CGM.getPointerAlign(),
3368  /*isConstant=*/true);
3369  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3370 
3371  // This is a Zero array to be used in the creation of the constant expressions
3372  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3373  llvm::Constant::getNullValue(CGM.Int32Ty)};
3374 
3375  // Create the target region descriptor.
3376  auto *BinaryDescriptorTy = cast<llvm::StructType>(
3378  ConstantInitBuilder DescBuilder(CGM);
3379  auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
3380  DescInit.addInt(CGM.Int32Ty, Devices.size());
3381  DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3382  DeviceImages,
3383  Index));
3384  DescInit.add(HostEntriesBegin);
3385  DescInit.add(HostEntriesEnd);
3386 
3387  auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
3388  CGM.getPointerAlign(),
3389  /*isConstant=*/true);
3390 
3391  // Emit code to register or unregister the descriptor at execution
3392  // startup or closing, respectively.
3393 
3394  // Create a variable to drive the registration and unregistration of the
3395  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3396  auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
3397  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
3398  IdentInfo, C.CharTy, ImplicitParamDecl::Other);
3399 
3401  CGM, ".omp_offloading.descriptor_unreg",
3402  [&](CodeGenFunction &CGF, PrePostActionTy &) {
3404  Desc);
3405  });
3407  CGM, ".omp_offloading.descriptor_reg",
3408  [&](CodeGenFunction &CGF, PrePostActionTy &) {
3410  Desc);
3411  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3412  });
3413  if (CGM.supportsCOMDAT()) {
3414  // It is sufficient to call registration function only once, so create a
3415  // COMDAT group for registration/unregistration functions and associated
3416  // data. That would reduce startup time and code size. Registration
3417  // function serves as a COMDAT group key.
3418  auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
3419  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3420  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3421  RegFn->setComdat(ComdatKey);
3422  UnRegFn->setComdat(ComdatKey);
3423  DeviceImages->setComdat(ComdatKey);
3424  Desc->setComdat(ComdatKey);
3425  }
3426  return RegFn;
3427 }
3428 
3429 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
3430  llvm::Constant *Addr, uint64_t Size,
3431  int32_t Flags) {
3432  StringRef Name = Addr->getName();
3433  auto *TgtOffloadEntryType = cast<llvm::StructType>(
3435  llvm::LLVMContext &C = CGM.getModule().getContext();
3436  llvm::Module &M = CGM.getModule();
3437 
3438  // Make sure the address has the right type.
3439  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
3440 
3441  // Create constant string with the name.
3442  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3443 
3444  llvm::GlobalVariable *Str =
3445  new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
3447  ".omp_offloading.entry_name");
3448  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3449  llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
3450 
3451  // We can't have any padding between symbols, so we need to have 1-byte
3452  // alignment.
3453  auto Align = CharUnits::fromQuantity(1);
3454 
3455  // Create the entry struct.
3456  ConstantInitBuilder EntryBuilder(CGM);
3457  auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
3458  EntryInit.add(AddrPtr);
3459  EntryInit.add(StrPtr);
3460  EntryInit.addInt(CGM.SizeTy, Size);
3461  EntryInit.addInt(CGM.Int32Ty, Flags);
3462  EntryInit.addInt(CGM.Int32Ty, 0);
3463  llvm::GlobalVariable *Entry =
3464  EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
3465  Align,
3466  /*constant*/ true,
3468 
3469  // The entry has to be created in the section the linker expects it to be.
3470  Entry->setSection(".omp_offloading.entries");
3471 }
3472 
3474  // Emit the offloading entries and metadata so that the device codegen side
3475  // can easily figure out what to emit. The produced metadata looks like
3476  // this:
3477  //
3478  // !omp_offload.info = !{!1, ...}
3479  //
3480  // Right now we only generate metadata for function that contain target
3481  // regions.
3482 
3483  // If we do not have entries, we dont need to do anything.
3485  return;
3486 
3487  llvm::Module &M = CGM.getModule();
3488  llvm::LLVMContext &C = M.getContext();
3490  OrderedEntries(OffloadEntriesInfoManager.size());
3491 
3492  // Create the offloading info metadata node.
3493  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3494 
3495  // Auxiliary methods to create metadata values and strings.
3496  auto getMDInt = [&](unsigned v) {
3497  return llvm::ConstantAsMetadata::get(
3498  llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
3499  };
3500 
3501  auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
3502 
3503  // Create function that emits metadata for each target region entry;
3504  auto &&TargetRegionMetadataEmitter = [&](
3505  unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
3508  // Generate metadata for target regions. Each entry of this metadata
3509  // contains:
3510  // - Entry 0 -> Kind of this type of metadata (0).
3511  // - Entry 1 -> Device ID of the file where the entry was identified.
3512  // - Entry 2 -> File ID of the file where the entry was identified.
3513  // - Entry 3 -> Mangled name of the function where the entry was identified.
3514  // - Entry 4 -> Line in the file where the entry was identified.
3515  // - Entry 5 -> Order the entry was created.
3516  // The first element of the metadata node is the kind.
3517  Ops.push_back(getMDInt(E.getKind()));
3518  Ops.push_back(getMDInt(DeviceID));
3519  Ops.push_back(getMDInt(FileID));
3520  Ops.push_back(getMDString(ParentName));
3521  Ops.push_back(getMDInt(Line));
3522  Ops.push_back(getMDInt(E.getOrder()));
3523 
3524  // Save this entry in the right position of the ordered entries array.
3525  OrderedEntries[E.getOrder()] = &E;
3526 
3527  // Add metadata to the named metadata node.
3528  MD->addOperand(llvm::MDNode::get(C, Ops));
3529  };
3530 
3532  TargetRegionMetadataEmitter);
3533 
3534  for (auto *E : OrderedEntries) {
3535  assert(E && "All ordered entries must exist!");
3536  if (auto *CE =
3537  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3538  E)) {
3539  assert(CE->getID() && CE->getAddress() &&
3540  "Entry ID and Addr are invalid!");
3541  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
3542  } else
3543  llvm_unreachable("Unsupported entry kind.");
3544  }
3545 }
3546 
3547 /// \brief Loads all the offload entries information from the host IR
3548 /// metadata.
3550  // If we are in target mode, load the metadata from the host IR. This code has
3551  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3552 
3553  if (!CGM.getLangOpts().OpenMPIsDevice)
3554  return;
3555 
3556  if (CGM.getLangOpts().OMPHostIRFile.empty())
3557  return;
3558 
3559  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3560  if (Buf.getError())
3561  return;
3562 
3563  llvm::LLVMContext C;
3564  auto ME = expectedToErrorOrAndEmitErrors(
3565  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3566 
3567  if (ME.getError())
3568  return;
3569 
3570  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3571  if (!MD)
3572  return;
3573 
3574  for (auto I : MD->operands()) {
3575  llvm::MDNode *MN = cast<llvm::MDNode>(I);
3576 
3577  auto getMDInt = [&](unsigned Idx) {
3578  llvm::ConstantAsMetadata *V =
3579  cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3580  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3581  };
3582 
3583  auto getMDString = [&](unsigned Idx) {
3584  llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3585  return V->getString();
3586  };
3587 
3588  switch (getMDInt(0)) {
3589  default:
3590  llvm_unreachable("Unexpected metadata!");
3591  break;
3595  /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3596  /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3597  /*Order=*/getMDInt(5));
3598  break;
3599  }
3600  }
3601 }
3602 
3604  if (!KmpRoutineEntryPtrTy) {
3605  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3606  auto &C = CGM.getContext();
3607  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3609  KmpRoutineEntryPtrQTy = C.getPointerType(
3610  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3611  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3612  }
3613 }
3614 
3616  QualType FieldTy) {
3617  auto *Field = FieldDecl::Create(
3618  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3620  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3621  Field->setAccess(AS_public);
3622  DC->addDecl(Field);
3623  return Field;
3624 }
3625 
3627 
3628  // Make sure the type of the entry is already created. This is the type we
3629  // have to create:
3630  // struct __tgt_offload_entry{
3631  // void *addr; // Pointer to the offload entry info.
3632  // // (function or global)
3633  // char *name; // Name of the function or global.
3634  // size_t size; // Size of the entry info (0 if it a function).
3635  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3636  // int32_t reserved; // Reserved, to use by the runtime library.
3637  // };
3638  if (TgtOffloadEntryQTy.isNull()) {
3639  ASTContext &C = CGM.getContext();
3640  auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3641  RD->startDefinition();
3642  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3644  addFieldToRecordDecl(C, RD, C.getSizeType());
3646  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3648  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3649  RD->completeDefinition();
3651  }
3652  return TgtOffloadEntryQTy;
3653 }
3654 
3656  // These are the types we need to build:
3657  // struct __tgt_device_image{
3658  // void *ImageStart; // Pointer to the target code start.
3659  // void *ImageEnd; // Pointer to the target code end.
3660  // // We also add the host entries to the device image, as it may be useful
3661  // // for the target runtime to have access to that information.
3662  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
3663  // // the entries.
3664  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
3665  // // entries (non inclusive).
3666  // };
3667  if (TgtDeviceImageQTy.isNull()) {
3668  ASTContext &C = CGM.getContext();
3669  auto *RD = C.buildImplicitRecord("__tgt_device_image");
3670  RD->startDefinition();
3671  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3672  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3675  RD->completeDefinition();
3677  }
3678  return TgtDeviceImageQTy;
3679 }
3680 
3682  // struct __tgt_bin_desc{
3683  // int32_t NumDevices; // Number of devices supported.
3684  // __tgt_device_image *DeviceImages; // Arrays of device images
3685  // // (one per device).
3686  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
3687  // // entries.
3688  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
3689  // // entries (non inclusive).
3690  // };
3692  ASTContext &C = CGM.getContext();
3693  auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3694  RD->startDefinition();
3696  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3700  RD->completeDefinition();
3702  }
3703  return TgtBinaryDescriptorQTy;
3704 }
3705 
3706 namespace {
3707 struct PrivateHelpersTy {
3708  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3709  const VarDecl *PrivateElemInit)
3710  : Original(Original), PrivateCopy(PrivateCopy),
3711  PrivateElemInit(PrivateElemInit) {}
3712  const VarDecl *Original;
3713  const VarDecl *PrivateCopy;
3714  const VarDecl *PrivateElemInit;
3715 };
3716 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3717 } // anonymous namespace
3718 
3719 static RecordDecl *
3721  if (!Privates.empty()) {
3722  auto &C = CGM.getContext();
3723  // Build struct .kmp_privates_t. {
3724  // /* private vars */
3725  // };
3726  auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3727  RD->startDefinition();
3728  for (auto &&Pair : Privates) {
3729  auto *VD = Pair.second.Original;
3730  auto Type = VD->getType();
3731  Type = Type.getNonReferenceType();
3732  auto *FD = addFieldToRecordDecl(C, RD, Type);
3733  if (VD->hasAttrs()) {
3734  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3735  E(VD->getAttrs().end());
3736  I != E; ++I)
3737  FD->addAttr(*I);
3738  }
3739  }
3740  RD->completeDefinition();
3741  return RD;
3742  }
3743  return nullptr;
3744 }
3745 
3746 static RecordDecl *
3748  QualType KmpInt32Ty,
3749  QualType KmpRoutineEntryPointerQTy) {
3750  auto &C = CGM.getContext();
3751  // Build struct kmp_task_t {
3752  // void * shareds;
3753  // kmp_routine_entry_t routine;
3754  // kmp_int32 part_id;
3755  // kmp_cmplrdata_t data1;
3756  // kmp_cmplrdata_t data2;
3757  // For taskloops additional fields:
3758  // kmp_uint64 lb;
3759  // kmp_uint64 ub;
3760  // kmp_int64 st;
3761  // kmp_int32 liter;
3762  // void * reductions;
3763  // };
3764  auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3765  UD->startDefinition();
3766  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3767  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3768  UD->completeDefinition();
3769  QualType KmpCmplrdataTy = C.getRecordType(UD);
3770  auto *RD = C.buildImplicitRecord("kmp_task_t");
3771  RD->startDefinition();
3772  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3773  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3774  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3775  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3776  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3777  if (isOpenMPTaskLoopDirective(Kind)) {
3778  QualType KmpUInt64Ty =
3779  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3780  QualType KmpInt64Ty =
3781  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3782  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3783  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3784  addFieldToRecordDecl(C, RD, KmpInt64Ty);
3785  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3786  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3787  }
3788  RD->completeDefinition();
3789  return RD;
3790 }
3791 
3792 static RecordDecl *
3794  ArrayRef<PrivateDataTy> Privates) {
3795  auto &C = CGM.getContext();
3796  // Build struct kmp_task_t_with_privates {
3797  // kmp_task_t task_data;
3798  // .kmp_privates_t. privates;
3799  // };
3800  auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3801  RD->startDefinition();
3802  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3803  if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3804  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3805  }
3806  RD->completeDefinition();
3807  return RD;
3808 }
3809 
3810 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3811 /// argument.
3812 /// \code
3813 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3814 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3815 /// For taskloops:
3816 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3817 /// tt->reductions, tt->shareds);
3818 /// return 0;
3819 /// }
3820 /// \endcode
3821 static llvm::Value *
3823  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3824  QualType KmpTaskTWithPrivatesPtrQTy,
3825  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3826  QualType SharedsPtrTy, llvm::Value *TaskFunction,
3827  llvm::Value *TaskPrivatesMap) {
3828  auto &C = CGM.getContext();
3829  FunctionArgList Args;
3830  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3832  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3833  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3835  Args.push_back(&GtidArg);
3836  Args.push_back(&TaskTypeArg);
3837  auto &TaskEntryFnInfo =
3838  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3839  auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3840  auto *TaskEntry =
3842  ".omp_task_entry.", &CGM.getModule());
3843  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3844  CodeGenFunction CGF(CGM);
3845  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3846 
3847  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3848  // tt,
3849  // For taskloops:
3850  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3851  // tt->task_data.shareds);
3852  auto *GtidParam = CGF.EmitLoadOfScalar(
3853  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3854  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3855  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3856  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3857  auto *KmpTaskTWithPrivatesQTyRD =
3858  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3859  LValue Base =
3860  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3861  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3862  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3863  auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3864  auto *PartidParam = PartIdLVal.getPointer();
3865 
3866  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3867  auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3868  auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3869  CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3870  CGF.ConvertTypeForMem(SharedsPtrTy));
3871 
3872  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3873  llvm::Value *PrivatesParam;
3874  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3875  auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3876  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3877  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3878  } else
3879  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3880 
3881  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3882  TaskPrivatesMap,
3883  CGF.Builder
3885  TDBase.getAddress(), CGF.VoidPtrTy)
3886  .getPointer()};
3887  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3888  std::end(CommonArgs));
3889  if (isOpenMPTaskLoopDirective(Kind)) {
3890  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3891  auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3892  auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3893  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3894  auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3895  auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3896  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3897  auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3898  auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3899  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3900  auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3901  auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3902  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3903  auto RLVal = CGF.EmitLValueForField(Base, *RFI);
3904  auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
3905  CallArgs.push_back(LBParam);
3906  CallArgs.push_back(UBParam);
3907  CallArgs.push_back(StParam);
3908  CallArgs.push_back(LIParam);
3909  CallArgs.push_back(RParam);
3910  }
3911  CallArgs.push_back(SharedsParam);
3912 
3913  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3914  CallArgs);
3916  RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3917  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3918  CGF.FinishFunction();
3919  return TaskEntry;
3920 }
3921 
3923  SourceLocation Loc,
3924  QualType KmpInt32Ty,
3925  QualType KmpTaskTWithPrivatesPtrQTy,
3926  QualType KmpTaskTWithPrivatesQTy) {
3927  auto &C = CGM.getContext();
3928  FunctionArgList Args;
3929  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3931  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3932  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3934  Args.push_back(&GtidArg);
3935  Args.push_back(&TaskTypeArg);
3936  auto &DestructorFnInfo =
3937  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3938  auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3939  auto *DestructorFn =
3941  ".omp_task_destructor.", &CGM.getModule());
3942  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3943  DestructorFnInfo);
3944  CodeGenFunction CGF(CGM);
3945  CGF.disableDebugInfo();
3946  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3947  Args);
3948 
3950  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3951  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3952  auto *KmpTaskTWithPrivatesQTyRD =
3953  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3954  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3955  Base = CGF.EmitLValueForField(Base, *FI);
3956  for (auto *Field :
3957  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3958  if (auto DtorKind = Field->getType().isDestructedType()) {
3959  auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3960  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3961  }
3962  }
3963  CGF.FinishFunction();
3964  return DestructorFn;
3965 }
3966 
3967 /// \brief Emit a privates mapping function for correct handling of private and
3968 /// firstprivate variables.
3969 /// \code
3970 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3971 /// **noalias priv1,..., <tyn> **noalias privn) {
3972 /// *priv1 = &.privates.priv1;
3973 /// ...;
3974 /// *privn = &.privates.privn;
3975 /// }
3976 /// \endcode
3977 static llvm::Value *
3979  ArrayRef<const Expr *> PrivateVars,
3980  ArrayRef<const Expr *> FirstprivateVars,
3981  ArrayRef<const Expr *> LastprivateVars,
3982  QualType PrivatesQTy,
3983  ArrayRef<PrivateDataTy> Privates) {
3984  auto &C = CGM.getContext();
3985  FunctionArgList Args;
3986  ImplicitParamDecl TaskPrivatesArg(
3987  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3988  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3990  Args.push_back(&TaskPrivatesArg);
3991  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3992  unsigned Counter = 1;
3993  for (auto *E: PrivateVars) {
3994  Args.push_back(ImplicitParamDecl::Create(
3995  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3996  C.getPointerType(C.getPointerType(E->getType()))
3997  .withConst()
3998  .withRestrict(),
4000  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4001  PrivateVarsPos[VD] = Counter;
4002  ++Counter;
4003  }
4004  for (auto *E : FirstprivateVars) {
4005  Args.push_back(ImplicitParamDecl::Create(
4006  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4007  C.getPointerType(C.getPointerType(E->getType()))
4008  .withConst()
4009  .withRestrict(),
4011  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4012  PrivateVarsPos[VD] = Counter;
4013  ++Counter;
4014  }
4015  for (auto *E: LastprivateVars) {
4016  Args.push_back(ImplicitParamDecl::Create(
4017  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4018  C.getPointerType(C.getPointerType(E->getType()))
4019  .withConst()
4020  .withRestrict(),
4022  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4023  PrivateVarsPos[VD] = Counter;
4024  ++Counter;
4025  }
4026  auto &TaskPrivatesMapFnInfo =
4027  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4028  auto *TaskPrivatesMapTy =
4029  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4030  auto *TaskPrivatesMap = llvm::Function::Create(
4031  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
4032  ".omp_task_privates_map.", &CGM.getModule());
4033  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
4034  TaskPrivatesMapFnInfo);
4035  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4036  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4037  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4038  CodeGenFunction CGF(CGM);
4039  CGF.disableDebugInfo();
4040  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4041  TaskPrivatesMapFnInfo, Args);
4042 
4043  // *privi = &.privates.privi;
4045  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4046  TaskPrivatesArg.getType()->castAs<PointerType>());
4047  auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4048  Counter = 0;
4049  for (auto *Field : PrivatesQTyRD->fields()) {
4050  auto FieldLVal = CGF.EmitLValueForField(Base, Field);
4051  auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4052  auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4053  auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4054  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4055  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4056  ++Counter;
4057  }
4058  CGF.FinishFunction();
4059  return TaskPrivatesMap;
4060 }
4061 
4062 static bool stable_sort_comparator(const PrivateDataTy P1,
4063  const PrivateDataTy P2) {
4064  return P1.first > P2.first;
4065 }
4066 
4067 /// Emit initialization for private variables in task-based directives.
4069  const OMPExecutableDirective &D,
4070  Address KmpTaskSharedsPtr, LValue TDBase,
4071  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4072  QualType SharedsTy, QualType SharedsPtrTy,
4073  const OMPTaskDataTy &Data,
4074  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4075  auto &C = CGF.getContext();
4076  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4077  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4078  LValue SrcBase;
4079  if (!Data.FirstprivateVars.empty()) {
4080  SrcBase = CGF.MakeAddrLValue(
4082  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4083  SharedsTy);
4084  }
4086  cast<CapturedStmt>(*D.getAssociatedStmt()));
4087  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4088  for (auto &&Pair : Privates) {
4089  auto *VD = Pair.second.PrivateCopy;
4090  auto *Init = VD->getAnyInitializer();
4091  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4092  !CGF.isTrivialInitializer(Init)))) {
4093  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4094  if (auto *Elem = Pair.second.PrivateElemInit) {
4095  auto *OriginalVD = Pair.second.Original;
4096  auto *SharedField = CapturesInfo.lookup(OriginalVD);
4097  auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4098  SharedRefLValue = CGF.MakeAddrLValue(
4099  Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4100  SharedRefLValue.getType(),
4102  SharedRefLValue.getTBAAInfo());
4103  QualType Type = OriginalVD->getType();
4104  if (Type->isArrayType()) {
4105  // Initialize firstprivate array.
4106  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4107  // Perform simple memcpy.
4108  CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
4109  SharedRefLValue.getAddress(), Type);
4110  } else {
4111  // Initialize firstprivate array using element-by-element
4112  // initialization.
4114  PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4115  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4116  Address SrcElement) {
4117  // Clean up any temporaries needed by the initialization.
4118  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4119  InitScope.addPrivate(
4120  Elem, [SrcElement]() -> Address { return SrcElement; });
4121  (void)InitScope.Privatize();
4122  // Emit initialization for single element.
4124  CGF, &CapturesInfo);
4125  CGF.EmitAnyExprToMem(Init, DestElement,
4126  Init->getType().getQualifiers(),
4127  /*IsInitializer=*/false);
4128  });
4129  }
4130  } else {
4131  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4132  InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4133  return SharedRefLValue.getAddress();
4134  });
4135  (void)InitScope.Privatize();
4136  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4137  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4138  /*capturedByInit=*/false);
4139  }
4140  } else
4141  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4142  }
4143  ++FI;
4144  }
4145 }
4146 
4147 /// Check if duplication function is required for taskloops.
4149  ArrayRef<PrivateDataTy> Privates) {
4150  bool InitRequired = false;
4151  for (auto &&Pair : Privates) {
4152  auto *VD = Pair.second.PrivateCopy;
4153  auto *Init = VD->getAnyInitializer();
4154  InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4155  !CGF.isTrivialInitializer(Init));
4156  }
4157  return InitRequired;
4158 }
4159 
4160 
4161 /// Emit task_dup function (for initialization of
4162 /// private/firstprivate/lastprivate vars and last_iter flag)
4163 /// \code
4164 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4165 /// lastpriv) {
4166 /// // setup lastprivate flag
4167 /// task_dst->last = lastpriv;
4168 /// // could be constructor calls here...
4169 /// }
4170 /// \endcode
4171 static llvm::Value *
4173  const OMPExecutableDirective &D,
4174  QualType KmpTaskTWithPrivatesPtrQTy,
4175  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4176  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4177  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4178  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4179  auto &C = CGM.getContext();
4180  FunctionArgList Args;
4181  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4182  KmpTaskTWithPrivatesPtrQTy,
4184  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4185  KmpTaskTWithPrivatesPtrQTy,
4187  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4189  Args.push_back(&DstArg);
4190  Args.push_back(&SrcArg);
4191  Args.push_back(&LastprivArg);
4192  auto &TaskDupFnInfo =
4193  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4194  auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4195  auto *TaskDup =
4197  ".omp_task_dup.", &CGM.getModule());
4198  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
4199  CodeGenFunction CGF(CGM);
4200  CGF.disableDebugInfo();
4201  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
4202 
4203  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4204  CGF.GetAddrOfLocalVar(&DstArg),
4205  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4206  // task_dst->liter = lastpriv;
4207  if (WithLastIter) {
4208  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4210  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4211  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4212  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4213  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4214  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4215  }
4216 
4217  // Emit initial values for private copies (if any).
4218  assert(!Privates.empty());
4219  Address KmpTaskSharedsPtr = Address::invalid();
4220  if (!Data.FirstprivateVars.empty()) {
4221  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4222  CGF.GetAddrOfLocalVar(&SrcArg),
4223  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4225  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4226  KmpTaskSharedsPtr = Address(
4228  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4229  KmpTaskTShareds)),
4230  Loc),
4231  CGF.getNaturalTypeAlignment(SharedsTy));
4232  }
4233  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4234  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4235  CGF.FinishFunction();
4236  return TaskDup;
4237 }
4238 
4239 /// Checks if destructor function is required to be generated.
4240 /// \return true if cleanups are required, false otherwise.
4241 static bool
4242 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4243  bool NeedsCleanup = false;
4244  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4245  auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4246  for (auto *FD : PrivateRD->fields()) {
4247  NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4248  if (NeedsCleanup)
4249  break;
4250  }
4251  return NeedsCleanup;
4252 }
4253 
4254 CGOpenMPRuntime::TaskResultTy
4256  const OMPExecutableDirective &D,
4257  llvm::Value *TaskFunction, QualType SharedsTy,
4258  Address Shareds, const OMPTaskDataTy &Data) {
4259  auto &C = CGM.getContext();
4261  // Aggregate privates and sort them by the alignment.
4262  auto I = Data.PrivateCopies.begin();
4263  for (auto *E : Data.PrivateVars) {
4264  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4265  Privates.push_back(std::make_pair(
4266  C.getDeclAlign(VD),
4267  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4268  /*PrivateElemInit=*/nullptr)));
4269  ++I;
4270  }
4271  I = Data.FirstprivateCopies.begin();
4272  auto IElemInitRef = Data.FirstprivateInits.begin();
4273  for (auto *E : Data.FirstprivateVars) {
4274  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4275  Privates.push_back(std::make_pair(
4276  C.getDeclAlign(VD),
4277  PrivateHelpersTy(
4278  VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4279  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
4280  ++I;
4281  ++IElemInitRef;
4282  }
4283  I = Data.LastprivateCopies.begin();
4284  for (auto *E : Data.LastprivateVars) {
4285  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4286  Privates.push_back(std::make_pair(
4287  C.getDeclAlign(VD),
4288  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4289  /*PrivateElemInit=*/nullptr)));
4290  ++I;
4291  }
4292  std::stable_sort(Privates.begin(), Privates.end(), stable_sort_comparator);
4293  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4294  // Build type kmp_routine_entry_t (if not built yet).
4295  emitKmpRoutineEntryT(KmpInt32Ty);
4296  // Build type kmp_task_t (if not built yet).
4298  if (SavedKmpTaskloopTQTy.isNull()) {
4300  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4301  }
4303  } else {
4304  assert(D.getDirectiveKind() == OMPD_task &&
4305  "Expected taskloop or task directive");
4306  if (SavedKmpTaskTQTy.isNull()) {
4307  SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4308  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4309  }
4311  }
4312  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4313  // Build particular struct kmp_task_t for the given task.
4314  auto *KmpTaskTWithPrivatesQTyRD =
4316  auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4317  QualType KmpTaskTWithPrivatesPtrQTy =
4318  C.getPointerType(KmpTaskTWithPrivatesQTy);
4319  auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4320  auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
4321  auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4322  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4323 
4324  // Emit initial values for private copies (if any).
4325  llvm::Value *TaskPrivatesMap = nullptr;
4326  auto *TaskPrivatesMapTy =
4327  std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4328  if (!Privates.empty()) {
4329  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4330  TaskPrivatesMap = emitTaskPrivateMappingFunction(
4331  CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4332  FI->getType(), Privates);
4333  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4334  TaskPrivatesMap, TaskPrivatesMapTy);
4335  } else {
4336  TaskPrivatesMap = llvm::ConstantPointerNull::get(
4337  cast<llvm::PointerType>(TaskPrivatesMapTy));
4338  }
4339  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4340  // kmp_task_t *tt);
4341  auto *TaskEntry = emitProxyTaskFunction(
4342  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4343  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4344  TaskPrivatesMap);
4345 
4346  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4347  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4348  // kmp_routine_entry_t *task_entry);
4349  // Task flags. Format is taken from
4350  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4351  // description of kmp_tasking_flags struct.
4352  enum {
4353  TiedFlag = 0x1,
4354  FinalFlag = 0x2,
4355  DestructorsFlag = 0x8,
4356  PriorityFlag = 0x20
4357  };
4358  unsigned Flags = Data.Tied ? TiedFlag : 0;
4359  bool NeedsCleanup = false;
4360  if (!Privates.empty()) {
4361  NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4362  if (NeedsCleanup)
4363  Flags = Flags | DestructorsFlag;
4364  }
4365  if (Data.Priority.getInt())
4366  Flags = Flags | PriorityFlag;
4367  auto *TaskFlags =
4368  Data.Final.getPointer()
4369  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4370  CGF.Builder.getInt32(FinalFlag),
4371  CGF.Builder.getInt32(/*C=*/0))
4372  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4373  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4374  auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4375  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4376  getThreadID(CGF, Loc), TaskFlags,
4377  KmpTaskTWithPrivatesTySize, SharedsSize,
4379  TaskEntry, KmpRoutineEntryPtrTy)};
4380  auto *NewTask = CGF.EmitRuntimeCall(
4382  auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4383  NewTask, KmpTaskTWithPrivatesPtrTy);
4384  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4385  KmpTaskTWithPrivatesQTy);
4386  LValue TDBase =
4387  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4388  // Fill the data in the resulting kmp_task_t record.
4389  // Copy shareds if there are any.
4390  Address KmpTaskSharedsPtr = Address::invalid();
4391  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4392  KmpTaskSharedsPtr =
4394  CGF.EmitLValueForField(
4395  TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4396  KmpTaskTShareds)),
4397  Loc),
4398  CGF.getNaturalTypeAlignment(SharedsTy));
4399  CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
4400  }
4401  // Emit initial values for private copies (if any).
4402  TaskResultTy Result;
4403  if (!Privates.empty()) {
4404  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4405  SharedsTy, SharedsPtrTy, Data, Privates,
4406  /*ForDup=*/false);
4408  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4409  Result.TaskDupFn = emitTaskDupFunction(
4410  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4411  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4412  /*WithLastIter=*/!Data.LastprivateVars.empty());
4413  }
4414  }
4415  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4416  enum { Priority = 0, Destructors = 1 };
4417  // Provide pointer to function with destructors for privates.
4418  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4419  auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
4420  if (NeedsCleanup) {
4421  llvm::Value *DestructorFn = emitDestructorsFunction(
4422  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4423  KmpTaskTWithPrivatesQTy);
4424  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4425  LValue DestructorsLV = CGF.EmitLValueForField(
4426  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4428  DestructorFn, KmpRoutineEntryPtrTy),
4429  DestructorsLV);
4430  }
4431  // Set priority.
4432  if (Data.Priority.getInt()) {
4433  LValue Data2LV = CGF.EmitLValueForField(
4434  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4435  LValue PriorityLV = CGF.EmitLValueForField(
4436  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4437  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4438  }
4439  Result.NewTask = NewTask;
4440  Result.TaskEntry = TaskEntry;
4441  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4442  Result.TDBase = TDBase;
4443  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4444  return Result;
4445 }
4446 
4448  const OMPExecutableDirective &D,
4449  llvm::Value *TaskFunction,
4450  QualType SharedsTy, Address Shareds,
4451  const Expr *IfCond,
4452  const OMPTaskDataTy &Data) {
4453  if (!CGF.HaveInsertPoint())
4454  return;
4455 
4456  TaskResultTy Result =
4457  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4458  llvm::Value *NewTask = Result.NewTask;
4459  llvm::Value *TaskEntry = Result.TaskEntry;
4460  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4461  LValue TDBase = Result.TDBase;
4462  RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4463  auto &C = CGM.getContext();
4464  // Process list of dependences.
4465  Address DependenciesArray = Address::invalid();
4466  unsigned NumDependencies = Data.Dependences.size();
4467  if (NumDependencies) {
4468  // Dependence kind for RTL.
4469  enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
4470  enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4471  RecordDecl *KmpDependInfoRD;
4472  QualType FlagsTy =
4473  C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4474  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4475  if (KmpDependInfoTy.isNull()) {
4476  KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4477  KmpDependInfoRD->startDefinition();
4478  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4479  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4480  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4481  KmpDependInfoRD->completeDefinition();
4482  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4483  } else
4484  KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4485  CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
4486  // Define type kmp_depend_info[<Dependences.size()>];
4487  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4488  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
4489  ArrayType::Normal, /*IndexTypeQuals=*/0);
4490  // kmp_depend_info[<Dependences.size()>] deps;
4491  DependenciesArray =
4492  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4493  for (unsigned i = 0; i < NumDependencies; ++i) {
4494  const Expr *E = Data.Dependences[i].second;
4495  auto Addr = CGF.EmitLValue(E);
4496  llvm::Value *Size;
4497  QualType Ty = E->getType();
4498  if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4499  LValue UpAddrLVal =
4500  CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
4501  llvm::Value *UpAddr =
4502  CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
4503  llvm::Value *LowIntPtr =
4504  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
4505  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
4506  Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4507  } else
4508  Size = CGF.getTypeSize(Ty);
4509  auto Base = CGF.MakeAddrLValue(
4510  CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
4511  KmpDependInfoTy);
4512  // deps[i].base_addr = &<Dependences[i].second>;
4513  auto BaseAddrLVal = CGF.EmitLValueForField(
4514  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4515  CGF.EmitStoreOfScalar(
4516  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
4517  BaseAddrLVal);
4518  // deps[i].len = sizeof(<Dependences[i].second>);
4519  auto LenLVal = CGF.EmitLValueForField(
4520  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4521  CGF.EmitStoreOfScalar(Size, LenLVal);
4522  // deps[i].flags = <Dependences[i].first>;
4523  RTLDependenceKindTy DepKind;
4524  switch (Data.Dependences[i].first) {
4525  case OMPC_DEPEND_in:
4526  DepKind = DepIn;
4527  break;
4528  // Out and InOut dependencies must use the same code.
4529  case OMPC_DEPEND_out:
4530  case OMPC_DEPEND_inout:
4531  DepKind = DepInOut;
4532  break;
4533  case OMPC_DEPEND_source:
4534  case OMPC_DEPEND_sink:
4535  case OMPC_DEPEND_unknown:
4536  llvm_unreachable("Unknown task dependence type");
4537  }
4538  auto FlagsLVal = CGF.EmitLValueForField(
4539  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4540  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4541  FlagsLVal);
4542  }
4543  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4544  CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
4545  CGF.VoidPtrTy);
4546  }
4547 
4548  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4549  // libcall.
4550  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4551  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4552  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4553  // list is not empty
4554  auto *ThreadID = getThreadID(CGF, Loc);
4555  auto *UpLoc = emitUpdateLocation(CGF, Loc);
4556  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4557  llvm::Value *DepTaskArgs[7];
4558  if (NumDependencies) {
4559  DepTaskArgs[0] = UpLoc;
4560  DepTaskArgs[1] = ThreadID;
4561  DepTaskArgs[2] = NewTask;
4562  DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
4563  DepTaskArgs[4] = DependenciesArray.getPointer();
4564  DepTaskArgs[5] = CGF.Builder.getInt32(0);
4565  DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4566  }
4567  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
4568  &TaskArgs,
4569  &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4570  if (!Data.Tied) {
4571  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4572  auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4573  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4574  }
4575  if (NumDependencies) {
4576  CGF.EmitRuntimeCall(
4578  } else {
4579  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4580  TaskArgs);
4581  }
4582  // Check if parent region is untied and build return for untied task;
4583  if (auto *Region =
4584  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4585  Region->emitUntiedSwitch(CGF);
4586  };
4587 
4588  llvm::Value *DepWaitTaskArgs[6];
4589  if (NumDependencies) {
4590  DepWaitTaskArgs[0] = UpLoc;
4591  DepWaitTaskArgs[1] = ThreadID;
4592  DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4593  DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4594  DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4595  DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4596  }
4597  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4598  NumDependencies, &DepWaitTaskArgs,
4599  Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4600  auto &RT = CGF.CGM.getOpenMPRuntime();
4601  CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4602  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4603  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4604  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4605  // is specified.
4606  if (NumDependencies)
4607  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4608  DepWaitTaskArgs);
4609  // Call proxy_task_entry(gtid, new_task);
4610  auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4611  Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4612  Action.Enter(CGF);
4613  llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4614  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4615  OutlinedFnArgs);
4616  };
4617 
4618  // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4619  // kmp_task_t *new_task);
4620  // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4621  // kmp_task_t *new_task);
4622  RegionCodeGenTy RCG(CodeGen);
4623  CommonActionTy Action(
4624  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4625  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4626  RCG.setAction(Action);
4627  RCG(CGF);
4628  };
4629 
4630  if (IfCond)
4631  emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4632  else {
4633  RegionCodeGenTy ThenRCG(ThenCodeGen);
4634  ThenRCG(CGF);
4635  }
4636 }
4637 
4639  const OMPLoopDirective &D,
4640  llvm::Value *TaskFunction,
4641  QualType SharedsTy, Address Shareds,
4642  const Expr *IfCond,
4643  const OMPTaskDataTy &Data) {
4644  if (!CGF.HaveInsertPoint())
4645  return;
4646  TaskResultTy Result =
4647  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4648  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4649  // libcall.
4650  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4651  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4652  // sched, kmp_uint64 grainsize, void *task_dup);
4653  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4654  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4655  llvm::Value *IfVal;
4656  if (IfCond) {
4657  IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4658  /*isSigned=*/true);
4659  } else
4660  IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4661 
4662  LValue LBLVal = CGF.EmitLValueForField(
4663  Result.TDBase,
4664  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4665  auto *LBVar =
4666  cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4667  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4668  /*IsInitializer=*/true);
4669  LValue UBLVal = CGF.EmitLValueForField(
4670  Result.TDBase,
4671  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4672  auto *UBVar =
4673  cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4674  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4675  /*IsInitializer=*/true);
4676  LValue StLVal = CGF.EmitLValueForField(
4677  Result.TDBase,
4678  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4679  auto *StVar =
4680  cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4681  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4682  /*IsInitializer=*/true);
4683  // Store reductions address.
4684  LValue RedLVal = CGF.EmitLValueForField(
4685  Result.TDBase,
4686  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4687  if (Data.Reductions)
4688  CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4689  else {
4690  CGF.EmitNullInitialization(RedLVal.getAddress(),
4691  CGF.getContext().VoidPtrTy);
4692  }
4693  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4694  llvm::Value *TaskArgs[] = {
4695  UpLoc,
4696  ThreadID,
4697  Result.NewTask,
4698  IfVal,
4699  LBLVal.getPointer(),
4700  UBLVal.getPointer(),
4701  CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4702  llvm::ConstantInt::getNullValue(
4703  CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
4704  llvm::ConstantInt::getSigned(
4705  CGF.IntTy, Data.Schedule.getPointer()
4706  ? Data.Schedule.getInt() ? NumTasks : Grainsize
4707  : NoSchedule),
4708  Data.Schedule.getPointer()
4709  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4710  /*isSigned=*/false)
4711  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4713  Result.TaskDupFn, CGF.VoidPtrTy)
4714  : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4716 }
4717 
4718 /// \brief Emit reduction operation for each element of array (required for
4719 /// array sections) LHS op = RHS.
4720 /// \param Type Type of array.
4721 /// \param LHSVar Variable on the left side of the reduction operation
4722 /// (references element of array in original variable).
4723 /// \param RHSVar Variable on the right side of the reduction operation
4724 /// (references element of array in original variable).
4725 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4726 /// RHSVar.
4728  CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4729  const VarDecl *RHSVar,
4730  const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4731  const Expr *, const Expr *)> &RedOpGen,
4732  const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4733  const Expr *UpExpr = nullptr) {
4734  // Perform element-by-element initialization.
4735  QualType ElementTy;
4736  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4737  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4738 
4739  // Drill down to the base element type on both arrays.
4740  auto ArrayTy = Type->getAsArrayTypeUnsafe();
4741  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4742 
4743  auto RHSBegin = RHSAddr.getPointer();
4744  auto LHSBegin = LHSAddr.getPointer();
4745  // Cast from pointer to array type to pointer to single element.
4746  auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4747  // The basic structure here is a while-do loop.
4748  auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4749