clang  9.0.0svn
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38  /// Kinds of OpenMP regions used in codegen.
39  enum CGOpenMPRegionKind {
40  /// Region with outlined function for standalone 'parallel'
41  /// directive.
42  ParallelOutlinedRegion,
43  /// Region with outlined function for standalone 'task' directive.
44  TaskOutlinedRegion,
45  /// Region for constructs that do not require function outlining,
46  /// like 'for', 'sections', 'atomic' etc. directives.
47  InlinedRegion,
48  /// Region with outlined function for standalone 'target' directive.
49  TargetRegion,
50  };
51 
52  CGOpenMPRegionInfo(const CapturedStmt &CS,
53  const CGOpenMPRegionKind RegionKind,
55  bool HasCancel)
56  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
61  bool HasCancel)
62  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63  Kind(Kind), HasCancel(HasCancel) {}
64 
65  /// Get a variable or parameter for storing global thread id
66  /// inside OpenMP construct.
67  virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69  /// Emit the captured statement body.
70  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72  /// Get an LValue for the current ThreadID variable.
73  /// \return LValue for thread id variable. This LValue always has type int32*.
74  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82  bool hasCancel() const { return HasCancel; }
83 
84  static bool classof(const CGCapturedStmtInfo *Info) {
85  return Info->getKind() == CR_OpenMP;
86  }
87 
88  ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91  CGOpenMPRegionKind RegionKind;
92  RegionCodeGenTy CodeGen;
94  bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101  const RegionCodeGenTy &CodeGen,
102  OpenMPDirectiveKind Kind, bool HasCancel,
103  StringRef HelperName)
104  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105  HasCancel),
106  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108  }
109 
110  /// Get a variable or parameter for storing global thread id
111  /// inside OpenMP construct.
112  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114  /// Get the name of the capture helper.
115  StringRef getHelperName() const override { return HelperName; }
116 
117  static bool classof(const CGCapturedStmtInfo *Info) {
118  return CGOpenMPRegionInfo::classof(Info) &&
119  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120  ParallelOutlinedRegion;
121  }
122 
123 private:
124  /// A variable or parameter storing global thread id for OpenMP
125  /// constructs.
126  const VarDecl *ThreadIDVar;
127  StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133  class UntiedTaskActionTy final : public PrePostActionTy {
134  bool Untied;
135  const VarDecl *PartIDVar;
136  const RegionCodeGenTy UntiedCodeGen;
137  llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139  public:
140  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141  const RegionCodeGenTy &UntiedCodeGen)
142  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143  void Enter(CodeGenFunction &CGF) override {
144  if (Untied) {
145  // Emit task switching point.
146  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147  CGF.GetAddrOfLocalVar(PartIDVar),
148  PartIDVar->getType()->castAs<PointerType>());
149  llvm::Value *Res =
150  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153  CGF.EmitBlock(DoneBB);
155  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157  CGF.Builder.GetInsertBlock());
158  emitUntiedSwitch(CGF);
159  }
160  }
161  void emitUntiedSwitch(CodeGenFunction &CGF) const {
162  if (Untied) {
163  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164  CGF.GetAddrOfLocalVar(PartIDVar),
165  PartIDVar->getType()->castAs<PointerType>());
166  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167  PartIdLVal);
168  UntiedCodeGen(CGF);
169  CodeGenFunction::JumpDest CurPoint =
170  CGF.getJumpDestInCurrentScope(".untied.next.");
172  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174  CGF.Builder.GetInsertBlock());
175  CGF.EmitBranchThroughCleanup(CurPoint);
176  CGF.EmitBlock(CurPoint.getBlock());
177  }
178  }
179  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180  };
181  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182  const VarDecl *ThreadIDVar,
183  const RegionCodeGenTy &CodeGen,
184  OpenMPDirectiveKind Kind, bool HasCancel,
185  const UntiedTaskActionTy &Action)
186  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187  ThreadIDVar(ThreadIDVar), Action(Action) {
188  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189  }
190 
191  /// Get a variable or parameter for storing global thread id
192  /// inside OpenMP construct.
193  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195  /// Get an LValue for the current ThreadID variable.
196  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198  /// Get the name of the capture helper.
199  StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201  void emitUntiedSwitch(CodeGenFunction &CGF) override {
202  Action.emitUntiedSwitch(CGF);
203  }
204 
205  static bool classof(const CGCapturedStmtInfo *Info) {
206  return CGOpenMPRegionInfo::classof(Info) &&
207  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208  TaskOutlinedRegion;
209  }
210 
211 private:
212  /// A variable or parameter storing global thread id for OpenMP
213  /// constructs.
214  const VarDecl *ThreadIDVar;
215  /// Action for emitting code for untied tasks.
216  const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224  const RegionCodeGenTy &CodeGen,
225  OpenMPDirectiveKind Kind, bool HasCancel)
226  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227  OldCSI(OldCSI),
228  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230  // Retrieve the value of the context parameter.
231  llvm::Value *getContextValue() const override {
232  if (OuterRegionInfo)
233  return OuterRegionInfo->getContextValue();
234  llvm_unreachable("No context value for inlined OpenMP region");
235  }
236 
237  void setContextValue(llvm::Value *V) override {
238  if (OuterRegionInfo) {
239  OuterRegionInfo->setContextValue(V);
240  return;
241  }
242  llvm_unreachable("No context value for inlined OpenMP region");
243  }
244 
245  /// Lookup the captured field decl for a variable.
246  const FieldDecl *lookup(const VarDecl *VD) const override {
247  if (OuterRegionInfo)
248  return OuterRegionInfo->lookup(VD);
249  // If there is no outer outlined region,no need to lookup in a list of
250  // captured variables, we can use the original one.
251  return nullptr;
252  }
253 
254  FieldDecl *getThisFieldDecl() const override {
255  if (OuterRegionInfo)
256  return OuterRegionInfo->getThisFieldDecl();
257  return nullptr;
258  }
259 
260  /// Get a variable or parameter for storing global thread id
261  /// inside OpenMP construct.
262  const VarDecl *getThreadIDVariable() const override {
263  if (OuterRegionInfo)
264  return OuterRegionInfo->getThreadIDVariable();
265  return nullptr;
266  }
267 
268  /// Get an LValue for the current ThreadID variable.
269  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270  if (OuterRegionInfo)
271  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272  llvm_unreachable("No LValue for inlined OpenMP construct");
273  }
274 
275  /// Get the name of the capture helper.
276  StringRef getHelperName() const override {
277  if (auto *OuterRegionInfo = getOldCSI())
278  return OuterRegionInfo->getHelperName();
279  llvm_unreachable("No helper name for inlined OpenMP construct");
280  }
281 
282  void emitUntiedSwitch(CodeGenFunction &CGF) override {
283  if (OuterRegionInfo)
284  OuterRegionInfo->emitUntiedSwitch(CGF);
285  }
286 
287  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289  static bool classof(const CGCapturedStmtInfo *Info) {
290  return CGOpenMPRegionInfo::classof(Info) &&
291  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292  }
293 
294  ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297  /// CodeGen info about outer OpenMP region.
299  CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310  const RegionCodeGenTy &CodeGen, StringRef HelperName)
311  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312  /*HasCancel=*/false),
313  HelperName(HelperName) {}
314 
315  /// This is unused for target regions because each starts executing
316  /// with a single thread.
317  const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319  /// Get the name of the capture helper.
320  StringRef getHelperName() const override { return HelperName; }
321 
322  static bool classof(const CGCapturedStmtInfo *Info) {
323  return CGOpenMPRegionInfo::classof(Info) &&
324  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325  }
326 
327 private:
328  StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332  llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340  OMPD_unknown,
341  /*HasCancel=*/false),
342  PrivScope(CGF) {
343  // Make sure the globals captured in the provided statement are local by
344  // using the privatization logic. We assume the same variable is not
345  // captured more than once.
346  for (const auto &C : CS.captures()) {
347  if (!C.capturesVariable() && !C.capturesVariableByCopy())
348  continue;
349 
350  const VarDecl *VD = C.getCapturedVar();
351  if (VD->isLocalVarDeclOrParm())
352  continue;
353 
354  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355  /*RefersToEnclosingVariableOrCapture=*/false,
357  C.getLocation());
358  PrivScope.addPrivate(
359  VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360  }
361  (void)PrivScope.Privatize();
362  }
363 
364  /// Lookup the captured field decl for a variable.
365  const FieldDecl *lookup(const VarDecl *VD) const override {
366  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367  return FD;
368  return nullptr;
369  }
370 
371  /// Emit the captured statement body.
372  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373  llvm_unreachable("No body for expressions");
374  }
375 
376  /// Get a variable or parameter for storing global thread id
377  /// inside OpenMP construct.
378  const VarDecl *getThreadIDVariable() const override {
379  llvm_unreachable("No thread id for expressions");
380  }
381 
382  /// Get the name of the capture helper.
383  StringRef getHelperName() const override {
384  llvm_unreachable("No helper name for expressions");
385  }
386 
387  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390  /// Private scope to capture global variables.
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396  CodeGenFunction &CGF;
397  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398  FieldDecl *LambdaThisCaptureField = nullptr;
399  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402  /// Constructs region for combined constructs.
403  /// \param CodeGen Code generation sequence for combined directives. Includes
404  /// a list of functions used for code generation of implicitly inlined
405  /// regions.
406  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407  OpenMPDirectiveKind Kind, bool HasCancel)
408  : CGF(CGF) {
409  // Start emission for the construct.
410  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414  CGF.LambdaThisCaptureField = nullptr;
415  BlockInfo = CGF.BlockInfo;
416  CGF.BlockInfo = nullptr;
417  }
418 
419  ~InlinedOpenMPRegionRAII() {
420  // Restore original CapturedStmtInfo only if we're done with code emission.
421  auto *OldCSI =
422  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423  delete CGF.CapturedStmtInfo;
424  CGF.CapturedStmtInfo = OldCSI;
425  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427  CGF.BlockInfo = BlockInfo;
428  }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435  /// Use trampoline for internal microtask.
436  OMP_IDENT_IMD = 0x01,
437  /// Use c-style ident structure.
438  OMP_IDENT_KMPC = 0x02,
439  /// Atomic reduction option for kmpc_reduce.
440  OMP_ATOMIC_REDUCE = 0x10,
441  /// Explicit 'barrier' directive.
442  OMP_IDENT_BARRIER_EXPL = 0x20,
443  /// Implicit barrier in code.
444  OMP_IDENT_BARRIER_IMPL = 0x40,
445  /// Implicit barrier in 'for' directive.
446  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447  /// Implicit barrier in 'sections' directive.
448  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449  /// Implicit barrier in 'single' directive.
450  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451  /// Call of __kmp_for_static_init for static loop.
452  OMP_IDENT_WORK_LOOP = 0x200,
453  /// Call of __kmp_for_static_init for sections.
454  OMP_IDENT_WORK_SECTIONS = 0x400,
455  /// Call of __kmp_for_static_init for distribute.
456  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 /// Describes ident structure that describes a source location.
461 /// All descriptions are taken from
462 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
463 /// Original structure:
464 /// typedef struct ident {
465 /// kmp_int32 reserved_1; /**< might be used in Fortran;
466 /// see above */
467 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
468 /// KMP_IDENT_KMPC identifies this union
469 /// member */
470 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
471 /// see above */
472 ///#if USE_ITT_BUILD
473 /// /* but currently used for storing
474 /// region-specific ITT */
475 /// /* contextual information. */
476 ///#endif /* USE_ITT_BUILD */
477 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
478 /// C++ */
479 /// char const *psource; /**< String describing the source location.
480 /// The string is composed of semi-colon separated
481 // fields which describe the source file,
482 /// the function and a pair of line numbers that
483 /// delimit the construct.
484 /// */
485 /// } ident_t;
487  /// might be used in Fortran
489  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
491  /// Not really used in Fortran any more
493  /// Source[4] in Fortran, do not use for C++
495  /// String describing the source location. The string is composed of
496  /// semi-colon separated fields which describe the source file, the function
497  /// and a pair of line numbers that delimit the construct.
499 };
500 
501 /// Schedule types for 'omp for' loops (these enumerators are taken from
502 /// the enum sched_type in kmp.h).
504  /// Lower bound for default (unordered) versions.
512  /// static with chunk adjustment (e.g., simd)
514  /// Lower bound for 'ordered' versions.
523  /// dist_schedule types
526  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
527  /// Set if the monotonic schedule modifier was present.
529  /// Set if the nonmonotonic schedule modifier was present.
531 };
532 
534  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
535  /// kmpc_micro microtask, ...);
537  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
538  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
540  /// Call to void __kmpc_threadprivate_register( ident_t *,
541  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
543  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
545  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
546  // kmp_critical_name *crit);
548  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
549  // global_tid, kmp_critical_name *crit, uintptr_t hint);
551  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
552  // kmp_critical_name *crit);
554  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
555  // global_tid);
557  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
559  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
561  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
562  // global_tid);
564  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
565  // global_tid);
567  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
568  // kmp_int32 num_threads);
570  // Call to void __kmpc_flush(ident_t *loc);
572  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
574  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
576  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
577  // int end_part);
579  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
581  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
583  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
584  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
585  // kmp_routine_entry_t *task_entry);
587  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
588  // new_task);
590  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
591  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
592  // kmp_int32 didit);
594  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
595  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
596  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
598  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
599  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
600  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
601  // *lck);
603  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
604  // kmp_critical_name *lck);
606  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
607  // kmp_critical_name *lck);
609  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
610  // kmp_task_t * new_task);
612  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
613  // kmp_task_t * new_task);
615  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
617  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
619  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
620  // global_tid);
622  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
624  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
626  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
627  // int proc_bind);
629  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
630  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
631  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
633  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
634  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
635  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
637  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
638  // global_tid, kmp_int32 cncl_kind);
640  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
641  // kmp_int32 cncl_kind);
643  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
644  // kmp_int32 num_teams, kmp_int32 thread_limit);
646  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
647  // microtask, ...);
649  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
650  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
651  // sched, kmp_uint64 grainsize, void *task_dup);
653  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
654  // num_dims, struct kmp_dim *dims);
656  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
658  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
659  // *vec);
661  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
662  // *vec);
664  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
665  // *data);
667  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
668  // *d);
670  // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
672  // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
674 
675  //
676  // Offloading related calls
677  //
678  // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
679  // size);
681  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
682  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
683  // *arg_types);
685  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
686  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
687  // *arg_types);
689  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
690  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
691  // *arg_types, int32_t num_teams, int32_t thread_limit);
693  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
694  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
695  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
697  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
699  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
701  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
702  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
704  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
705  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
706  // *arg_types);
708  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
709  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
711  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
712  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
713  // *arg_types);
715  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
716  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
718  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
719  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
720  // *arg_types);
722 };
723 
724 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
725 /// region.
726 class CleanupTy final : public EHScopeStack::Cleanup {
727  PrePostActionTy *Action;
728 
729 public:
730  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
731  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
732  if (!CGF.HaveInsertPoint())
733  return;
734  Action->Exit(CGF);
735  }
736 };
737 
738 } // anonymous namespace
739 
742  if (PrePostAction) {
743  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
744  Callback(CodeGen, CGF, *PrePostAction);
745  } else {
746  PrePostActionTy Action;
747  Callback(CodeGen, CGF, Action);
748  }
749 }
750 
751 /// Check if the combiner is a call to UDR combiner and if it is so return the
752 /// UDR decl used for reduction.
753 static const OMPDeclareReductionDecl *
754 getReductionInit(const Expr *ReductionOp) {
755  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
756  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
757  if (const auto *DRE =
758  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
759  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
760  return DRD;
761  return nullptr;
762 }
763 
765  const OMPDeclareReductionDecl *DRD,
766  const Expr *InitOp,
767  Address Private, Address Original,
768  QualType Ty) {
769  if (DRD->getInitializer()) {
770  std::pair<llvm::Function *, llvm::Function *> Reduction =
771  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
772  const auto *CE = cast<CallExpr>(InitOp);
773  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
774  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
775  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
776  const auto *LHSDRE =
777  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
778  const auto *RHSDRE =
779  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
780  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
781  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
782  [=]() { return Private; });
783  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
784  [=]() { return Original; });
785  (void)PrivateScope.Privatize();
786  RValue Func = RValue::get(Reduction.second);
787  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
788  CGF.EmitIgnoredExpr(InitOp);
789  } else {
790  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
791  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
792  auto *GV = new llvm::GlobalVariable(
793  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
794  llvm::GlobalValue::PrivateLinkage, Init, Name);
795  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
796  RValue InitRVal;
797  switch (CGF.getEvaluationKind(Ty)) {
798  case TEK_Scalar:
799  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
800  break;
801  case TEK_Complex:
802  InitRVal =
804  break;
805  case TEK_Aggregate:
806  InitRVal = RValue::getAggregate(LV.getAddress());
807  break;
808  }
809  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
810  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
811  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
812  /*IsInitializer=*/false);
813  }
814 }
815 
816 /// Emit initialization of arrays of complex types.
817 /// \param DestAddr Address of the array.
818 /// \param Type Type of array.
819 /// \param Init Initial expression of array.
820 /// \param SrcAddr Address of the original array.
821 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
822  QualType Type, bool EmitDeclareReductionInit,
823  const Expr *Init,
824  const OMPDeclareReductionDecl *DRD,
825  Address SrcAddr = Address::invalid()) {
826  // Perform element-by-element initialization.
827  QualType ElementTy;
828 
829  // Drill down to the base element type on both arrays.
830  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
831  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
832  DestAddr =
833  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
834  if (DRD)
835  SrcAddr =
836  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
837 
838  llvm::Value *SrcBegin = nullptr;
839  if (DRD)
840  SrcBegin = SrcAddr.getPointer();
841  llvm::Value *DestBegin = DestAddr.getPointer();
842  // Cast from pointer to array type to pointer to single element.
843  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
844  // The basic structure here is a while-do loop.
845  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
846  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
847  llvm::Value *IsEmpty =
848  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
849  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
850 
851  // Enter the loop body, making that address the current address.
852  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
853  CGF.EmitBlock(BodyBB);
854 
855  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
856 
857  llvm::PHINode *SrcElementPHI = nullptr;
858  Address SrcElementCurrent = Address::invalid();
859  if (DRD) {
860  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
861  "omp.arraycpy.srcElementPast");
862  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
863  SrcElementCurrent =
864  Address(SrcElementPHI,
865  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
866  }
867  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
868  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
869  DestElementPHI->addIncoming(DestBegin, EntryBB);
870  Address DestElementCurrent =
871  Address(DestElementPHI,
872  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
873 
874  // Emit copy.
875  {
876  CodeGenFunction::RunCleanupsScope InitScope(CGF);
877  if (EmitDeclareReductionInit) {
878  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
879  SrcElementCurrent, ElementTy);
880  } else
881  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
882  /*IsInitializer=*/false);
883  }
884 
885  if (DRD) {
886  // Shift the address forward by one element.
887  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
888  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
889  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
890  }
891 
892  // Shift the address forward by one element.
893  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
894  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
895  // Check whether we've reached the end.
896  llvm::Value *Done =
897  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
898  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
899  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
900 
901  // Done.
902  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
903 }
904 
905 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
906  return CGF.EmitOMPSharedLValue(E);
907 }
908 
909 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
910  const Expr *E) {
911  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
912  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
913  return LValue();
914 }
915 
916 void ReductionCodeGen::emitAggregateInitialization(
917  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
918  const OMPDeclareReductionDecl *DRD) {
919  // Emit VarDecl with copy init for arrays.
920  // Get the address of the original variable captured in current
921  // captured region.
922  const auto *PrivateVD =
923  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
924  bool EmitDeclareReductionInit =
925  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
926  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
927  EmitDeclareReductionInit,
928  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
929  : PrivateVD->getInit(),
930  DRD, SharedLVal.getAddress());
931 }
932 
935  ArrayRef<const Expr *> ReductionOps) {
936  ClausesData.reserve(Shareds.size());
937  SharedAddresses.reserve(Shareds.size());
938  Sizes.reserve(Shareds.size());
939  BaseDecls.reserve(Shareds.size());
940  auto IPriv = Privates.begin();
941  auto IRed = ReductionOps.begin();
942  for (const Expr *Ref : Shareds) {
943  ClausesData.emplace_back(Ref, *IPriv, *IRed);
944  std::advance(IPriv, 1);
945  std::advance(IRed, 1);
946  }
947 }
948 
949 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
950  assert(SharedAddresses.size() == N &&
951  "Number of generated lvalues must be exactly N.");
952  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
953  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
954  SharedAddresses.emplace_back(First, Second);
955 }
956 
958  const auto *PrivateVD =
959  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
960  QualType PrivateType = PrivateVD->getType();
961  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
962  if (!PrivateType->isVariablyModifiedType()) {
963  Sizes.emplace_back(
964  CGF.getTypeSize(
965  SharedAddresses[N].first.getType().getNonReferenceType()),
966  nullptr);
967  return;
968  }
969  llvm::Value *Size;
970  llvm::Value *SizeInChars;
971  auto *ElemType =
972  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
973  ->getElementType();
974  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
975  if (AsArraySection) {
976  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
977  SharedAddresses[N].first.getPointer());
978  Size = CGF.Builder.CreateNUWAdd(
979  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
980  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
981  } else {
982  SizeInChars = CGF.getTypeSize(
983  SharedAddresses[N].first.getType().getNonReferenceType());
984  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
985  }
986  Sizes.emplace_back(SizeInChars, Size);
988  CGF,
989  cast<OpaqueValueExpr>(
990  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
991  RValue::get(Size));
992  CGF.EmitVariablyModifiedType(PrivateType);
993 }
994 
996  llvm::Value *Size) {
997  const auto *PrivateVD =
998  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
999  QualType PrivateType = PrivateVD->getType();
1000  if (!PrivateType->isVariablyModifiedType()) {
1001  assert(!Size && !Sizes[N].second &&
1002  "Size should be nullptr for non-variably modified reduction "
1003  "items.");
1004  return;
1005  }
1007  CGF,
1008  cast<OpaqueValueExpr>(
1009  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1010  RValue::get(Size));
1011  CGF.EmitVariablyModifiedType(PrivateType);
1012 }
1013 
1015  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1016  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1017  assert(SharedAddresses.size() > N && "No variable was generated");
1018  const auto *PrivateVD =
1019  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1020  const OMPDeclareReductionDecl *DRD =
1021  getReductionInit(ClausesData[N].ReductionOp);
1022  QualType PrivateType = PrivateVD->getType();
1023  PrivateAddr = CGF.Builder.CreateElementBitCast(
1024  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1025  QualType SharedType = SharedAddresses[N].first.getType();
1026  SharedLVal = CGF.MakeAddrLValue(
1027  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1028  CGF.ConvertTypeForMem(SharedType)),
1029  SharedType, SharedAddresses[N].first.getBaseInfo(),
1030  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1031  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1032  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1033  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1034  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1035  PrivateAddr, SharedLVal.getAddress(),
1036  SharedLVal.getType());
1037  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1038  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1039  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1040  PrivateVD->getType().getQualifiers(),
1041  /*IsInitializer=*/false);
1042  }
1043 }
1044 
1046  const auto *PrivateVD =
1047  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1048  QualType PrivateType = PrivateVD->getType();
1049  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1050  return DTorKind != QualType::DK_none;
1051 }
1052 
1054  Address PrivateAddr) {
1055  const auto *PrivateVD =
1056  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1057  QualType PrivateType = PrivateVD->getType();
1058  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1059  if (needCleanups(N)) {
1060  PrivateAddr = CGF.Builder.CreateElementBitCast(
1061  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1062  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1063  }
1064 }
1065 
1067  LValue BaseLV) {
1068  BaseTy = BaseTy.getNonReferenceType();
1069  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1070  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1071  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1072  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1073  } else {
1074  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1075  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1076  }
1077  BaseTy = BaseTy->getPointeeType();
1078  }
1079  return CGF.MakeAddrLValue(
1081  CGF.ConvertTypeForMem(ElTy)),
1082  BaseLV.getType(), BaseLV.getBaseInfo(),
1083  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1084 }
1085 
1087  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1088  llvm::Value *Addr) {
1089  Address Tmp = Address::invalid();
1090  Address TopTmp = Address::invalid();
1091  Address MostTopTmp = Address::invalid();
1092  BaseTy = BaseTy.getNonReferenceType();
1093  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1094  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1095  Tmp = CGF.CreateMemTemp(BaseTy);
1096  if (TopTmp.isValid())
1097  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1098  else
1099  MostTopTmp = Tmp;
1100  TopTmp = Tmp;
1101  BaseTy = BaseTy->getPointeeType();
1102  }
1103  llvm::Type *Ty = BaseLVType;
1104  if (Tmp.isValid())
1105  Ty = Tmp.getElementType();
1106  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1107  if (Tmp.isValid()) {
1108  CGF.Builder.CreateStore(Addr, Tmp);
1109  return MostTopTmp;
1110  }
1111  return Address(Addr, BaseLVAlignment);
1112 }
1113 
1114 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1115  const VarDecl *OrigVD = nullptr;
1116  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1117  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1118  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1119  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1120  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1121  Base = TempASE->getBase()->IgnoreParenImpCasts();
1122  DE = cast<DeclRefExpr>(Base);
1123  OrigVD = cast<VarDecl>(DE->getDecl());
1124  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1125  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1126  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1127  Base = TempASE->getBase()->IgnoreParenImpCasts();
1128  DE = cast<DeclRefExpr>(Base);
1129  OrigVD = cast<VarDecl>(DE->getDecl());
1130  }
1131  return OrigVD;
1132 }
1133 
1135  Address PrivateAddr) {
1136  const DeclRefExpr *DE;
1137  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1138  BaseDecls.emplace_back(OrigVD);
1139  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1140  LValue BaseLValue =
1141  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1142  OriginalBaseLValue);
1143  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1144  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1145  llvm::Value *PrivatePointer =
1147  PrivateAddr.getPointer(),
1148  SharedAddresses[N].first.getAddress().getType());
1149  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1150  return castToBase(CGF, OrigVD->getType(),
1151  SharedAddresses[N].first.getType(),
1152  OriginalBaseLValue.getAddress().getType(),
1153  OriginalBaseLValue.getAlignment(), Ptr);
1154  }
1155  BaseDecls.emplace_back(
1156  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1157  return PrivateAddr;
1158 }
1159 
1161  const OMPDeclareReductionDecl *DRD =
1162  getReductionInit(ClausesData[N].ReductionOp);
1163  return DRD && DRD->getInitializer();
1164 }
1165 
1166 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1167  return CGF.EmitLoadOfPointerLValue(
1168  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1169  getThreadIDVariable()->getType()->castAs<PointerType>());
1170 }
1171 
1172 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1173  if (!CGF.HaveInsertPoint())
1174  return;
1175  // 1.2.2 OpenMP Language Terminology
1176  // Structured block - An executable statement with a single entry at the
1177  // top and a single exit at the bottom.
1178  // The point of exit cannot be a branch out of the structured block.
1179  // longjmp() and throw() must not violate the entry/exit criteria.
1180  CGF.EHStack.pushTerminate();
1181  CodeGen(CGF);
1182  CGF.EHStack.popTerminate();
1183 }
1184 
1185 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1186  CodeGenFunction &CGF) {
1187  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1188  getThreadIDVariable()->getType(),
1190 }
1191 
1193  QualType FieldTy) {
1194  auto *Field = FieldDecl::Create(
1195  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1197  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1198  Field->setAccess(AS_public);
1199  DC->addDecl(Field);
1200  return Field;
1201 }
1202 
1203 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1204  StringRef Separator)
1205  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1207  ASTContext &C = CGM.getContext();
1208  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1209  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1210  RD->startDefinition();
1211  // reserved_1
1212  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1213  // flags
1214  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1215  // reserved_2
1216  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1217  // reserved_3
1218  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1219  // psource
1220  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1221  RD->completeDefinition();
1222  IdentQTy = C.getRecordType(RD);
1223  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1224  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1225 
1227 }
1228 
1229 void CGOpenMPRuntime::clear() {
1230  InternalVars.clear();
1231  // Clean non-target variable declarations possibly used only in debug info.
1232  for (const auto &Data : EmittedNonTargetVariables) {
1233  if (!Data.getValue().pointsToAliveValue())
1234  continue;
1235  auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1236  if (!GV)
1237  continue;
1238  if (!GV->isDeclaration() || GV->getNumUses() > 0)
1239  continue;
1240  GV->eraseFromParent();
1241  }
1242 }
1243 
1244 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1245  SmallString<128> Buffer;
1246  llvm::raw_svector_ostream OS(Buffer);
1247  StringRef Sep = FirstSeparator;
1248  for (StringRef Part : Parts) {
1249  OS << Sep << Part;
1250  Sep = Separator;
1251  }
1252  return OS.str();
1253 }
1254 
1255 static llvm::Function *
1257  const Expr *CombinerInitializer, const VarDecl *In,
1258  const VarDecl *Out, bool IsCombiner) {
1259  // void .omp_combiner.(Ty *in, Ty *out);
1260  ASTContext &C = CGM.getContext();
1261  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1262  FunctionArgList Args;
1263  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1264  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1265  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1266  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1267  Args.push_back(&OmpOutParm);
1268  Args.push_back(&OmpInParm);
1269  const CGFunctionInfo &FnInfo =
1271  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1272  std::string Name = CGM.getOpenMPRuntime().getName(
1273  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1275  Name, &CGM.getModule());
1276  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1277  Fn->removeFnAttr(llvm::Attribute::NoInline);
1278  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1279  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1280  CodeGenFunction CGF(CGM);
1281  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1282  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1283  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1284  Out->getLocation());
1286  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1287  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1288  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1289  .getAddress();
1290  });
1291  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1292  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1293  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1294  .getAddress();
1295  });
1296  (void)Scope.Privatize();
1297  if (!IsCombiner && Out->hasInit() &&
1298  !CGF.isTrivialInitializer(Out->getInit())) {
1299  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1300  Out->getType().getQualifiers(),
1301  /*IsInitializer=*/true);
1302  }
1303  if (CombinerInitializer)
1304  CGF.EmitIgnoredExpr(CombinerInitializer);
1305  Scope.ForceCleanup();
1306  CGF.FinishFunction();
1307  return Fn;
1308 }
1309 
1311  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1312  if (UDRMap.count(D) > 0)
1313  return;
1314  llvm::Function *Combiner = emitCombinerOrInitializer(
1315  CGM, D->getType(), D->getCombiner(),
1316  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1317  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1318  /*IsCombiner=*/true);
1319  llvm::Function *Initializer = nullptr;
1320  if (const Expr *Init = D->getInitializer()) {
1321  Initializer = emitCombinerOrInitializer(
1322  CGM, D->getType(),
1324  : nullptr,
1325  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1326  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1327  /*IsCombiner=*/false);
1328  }
1329  UDRMap.try_emplace(D, Combiner, Initializer);
1330  if (CGF) {
1331  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1332  Decls.second.push_back(D);
1333  }
1334 }
1335 
1336 std::pair<llvm::Function *, llvm::Function *>
1338  auto I = UDRMap.find(D);
1339  if (I != UDRMap.end())
1340  return I->second;
1341  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1342  return UDRMap.lookup(D);
1343 }
1344 
1346  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1347  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1348  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1349  assert(ThreadIDVar->getType()->isPointerType() &&
1350  "thread id variable must be of type kmp_int32 *");
1351  CodeGenFunction CGF(CGM, true);
1352  bool HasCancel = false;
1353  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1354  HasCancel = OPD->hasCancel();
1355  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1356  HasCancel = OPSD->hasCancel();
1357  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1358  HasCancel = OPFD->hasCancel();
1359  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1360  HasCancel = OPFD->hasCancel();
1361  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1362  HasCancel = OPFD->hasCancel();
1363  else if (const auto *OPFD =
1364  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1365  HasCancel = OPFD->hasCancel();
1366  else if (const auto *OPFD =
1367  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1368  HasCancel = OPFD->hasCancel();
1369  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1370  HasCancel, OutlinedHelperName);
1371  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1372  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1373 }
1374 
1376  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1377  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1378  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1380  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1381 }
1382 
1384  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1385  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1386  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1388  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1389 }
1390 
1392  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1393  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1394  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1395  bool Tied, unsigned &NumberOfParts) {
1396  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1397  PrePostActionTy &) {
1398  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1399  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1400  llvm::Value *TaskArgs[] = {
1401  UpLoc, ThreadID,
1402  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1403  TaskTVar->getType()->castAs<PointerType>())
1404  .getPointer()};
1405  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1406  };
1407  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1408  UntiedCodeGen);
1409  CodeGen.setAction(Action);
1410  assert(!ThreadIDVar->getType()->isPointerType() &&
1411  "thread id variable must be of type kmp_int32 for tasks");
1412  const OpenMPDirectiveKind Region =
1413  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1414  : OMPD_task;
1415  const CapturedStmt *CS = D.getCapturedStmt(Region);
1416  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1417  CodeGenFunction CGF(CGM, true);
1418  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1419  InnermostKind,
1420  TD ? TD->hasCancel() : false, Action);
1421  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1422  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1423  if (!Tied)
1424  NumberOfParts = Action.getNumberOfParts();
1425  return Res;
1426 }
1427 
1429  const RecordDecl *RD, const CGRecordLayout &RL,
1430  ArrayRef<llvm::Constant *> Data) {
1431  llvm::StructType *StructTy = RL.getLLVMType();
1432  unsigned PrevIdx = 0;
1433  ConstantInitBuilder CIBuilder(CGM);
1434  auto DI = Data.begin();
1435  for (const FieldDecl *FD : RD->fields()) {
1436  unsigned Idx = RL.getLLVMFieldNo(FD);
1437  // Fill the alignment.
1438  for (unsigned I = PrevIdx; I < Idx; ++I)
1439  Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1440  PrevIdx = Idx + 1;
1441  Fields.add(*DI);
1442  ++DI;
1443  }
1444 }
1445 
1446 template <class... As>
1447 static llvm::GlobalVariable *
1449  ArrayRef<llvm::Constant *> Data, const Twine &Name,
1450  As &&... Args) {
1451  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1452  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1453  ConstantInitBuilder CIBuilder(CGM);
1454  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1455  buildStructValue(Fields, CGM, RD, RL, Data);
1456  return Fields.finishAndCreateGlobal(
1457  Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1458  std::forward<As>(Args)...);
1459 }
1460 
1461 template <typename T>
1462 static void
1464  ArrayRef<llvm::Constant *> Data,
1465  T &Parent) {
1466  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1467  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1468  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1469  buildStructValue(Fields, CGM, RD, RL, Data);
1470  Fields.finishAndAddTo(Parent);
1471 }
1472 
1473 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1474  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1475  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1476  FlagsTy FlagsKey(Flags, Reserved2Flags);
1477  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1478  if (!Entry) {
1479  if (!DefaultOpenMPPSource) {
1480  // Initialize default location for psource field of ident_t structure of
1481  // all ident_t objects. Format is ";file;function;line;column;;".
1482  // Taken from
1483  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1484  DefaultOpenMPPSource =
1485  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1486  DefaultOpenMPPSource =
1487  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1488  }
1489 
1490  llvm::Constant *Data[] = {
1491  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1492  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1493  llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1494  llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1495  llvm::GlobalValue *DefaultOpenMPLocation =
1496  createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1497  llvm::GlobalValue::PrivateLinkage);
1498  DefaultOpenMPLocation->setUnnamedAddr(
1499  llvm::GlobalValue::UnnamedAddr::Global);
1500 
1501  OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1502  }
1503  return Address(Entry, Align);
1504 }
1505 
1507  bool AtCurrentPoint) {
1508  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1510 
1511  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1512  if (AtCurrentPoint) {
1513  Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1514  Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1515  } else {
1516  Elem.second.ServiceInsertPt =
1517  new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1518  Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1519  }
1520 }
1521 
1523  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1524  if (Elem.second.ServiceInsertPt) {
1525  llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1526  Elem.second.ServiceInsertPt = nullptr;
1527  Ptr->eraseFromParent();
1528  }
1529 }
1530 
1532  SourceLocation Loc,
1533  unsigned Flags) {
1534  Flags |= OMP_IDENT_KMPC;
1535  // If no debug info is generated - return global default location.
1536  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1537  Loc.isInvalid())
1538  return getOrCreateDefaultLocation(Flags).getPointer();
1539 
1540  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1541 
1542  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1543  Address LocValue = Address::invalid();
1544  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1545  if (I != OpenMPLocThreadIDMap.end())
1546  LocValue = Address(I->second.DebugLoc, Align);
1547 
1548  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1549  // GetOpenMPThreadID was called before this routine.
1550  if (!LocValue.isValid()) {
1551  // Generate "ident_t .kmpc_loc.addr;"
1552  Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1553  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1554  Elem.second.DebugLoc = AI.getPointer();
1555  LocValue = AI;
1556 
1557  if (!Elem.second.ServiceInsertPt)
1559  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1560  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1561  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1562  CGF.getTypeSize(IdentQTy));
1563  }
1564 
1565  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1566  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1567  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1568  LValue PSource =
1569  CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1570 
1571  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1572  if (OMPDebugLoc == nullptr) {
1573  SmallString<128> Buffer2;
1574  llvm::raw_svector_ostream OS2(Buffer2);
1575  // Build debug location
1577  OS2 << ";" << PLoc.getFilename() << ";";
1578  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1579  OS2 << FD->getQualifiedNameAsString();
1580  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1581  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1582  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1583  }
1584  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1585  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1586 
1587  // Our callers always pass this to a runtime function, so for
1588  // convenience, go ahead and return a naked pointer.
1589  return LocValue.getPointer();
1590 }
1591 
1593  SourceLocation Loc) {
1594  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1595 
1596  llvm::Value *ThreadID = nullptr;
1597  // Check whether we've already cached a load of the thread id in this
1598  // function.
1599  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1600  if (I != OpenMPLocThreadIDMap.end()) {
1601  ThreadID = I->second.ThreadID;
1602  if (ThreadID != nullptr)
1603  return ThreadID;
1604  }
1605  // If exceptions are enabled, do not use parameter to avoid possible crash.
1606  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1607  !CGF.getLangOpts().CXXExceptions ||
1608  CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1609  if (auto *OMPRegionInfo =
1610  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1611  if (OMPRegionInfo->getThreadIDVariable()) {
1612  // Check if this an outlined function with thread id passed as argument.
1613  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1614  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1615  // If value loaded in entry block, cache it and use it everywhere in
1616  // function.
1617  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1618  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1619  Elem.second.ThreadID = ThreadID;
1620  }
1621  return ThreadID;
1622  }
1623  }
1624  }
1625 
1626  // This is not an outlined function region - need to call __kmpc_int32
1627  // kmpc_global_thread_num(ident_t *loc).
1628  // Generate thread id value and cache this value for use across the
1629  // function.
1630  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1631  if (!Elem.second.ServiceInsertPt)
1633  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1634  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1635  llvm::CallInst *Call = CGF.Builder.CreateCall(
1637  emitUpdateLocation(CGF, Loc));
1638  Call->setCallingConv(CGF.getRuntimeCC());
1639  Elem.second.ThreadID = Call;
1640  return Call;
1641 }
1642 
1644  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1645  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1647  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1648  }
1649  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1650  for(auto *D : FunctionUDRMap[CGF.CurFn])
1651  UDRMap.erase(D);
1652  FunctionUDRMap.erase(CGF.CurFn);
1653  }
1654 }
1655 
1657  return IdentTy->getPointerTo();
1658 }
1659 
1661  if (!Kmpc_MicroTy) {
1662  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1663  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1664  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1665  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1666  }
1667  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1668 }
1669 
1670 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1671  llvm::FunctionCallee RTLFn = nullptr;
1672  switch (static_cast<OpenMPRTLFunction>(Function)) {
1673  case OMPRTL__kmpc_fork_call: {
1674  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1675  // microtask, ...);
1676  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1678  auto *FnTy =
1679  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1680  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1681  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1682  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1683  llvm::LLVMContext &Ctx = F->getContext();
1684  llvm::MDBuilder MDB(Ctx);
1685  // Annotate the callback behavior of the __kmpc_fork_call:
1686  // - The callback callee is argument number 2 (microtask).
1687  // - The first two arguments of the callback callee are unknown (-1).
1688  // - All variadic arguments to the __kmpc_fork_call are passed to the
1689  // callback callee.
1690  F->addMetadata(
1691  llvm::LLVMContext::MD_callback,
1692  *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1693  2, {-1, -1},
1694  /* VarArgsArePassed */ true)}));
1695  }
1696  }
1697  break;
1698  }
1700  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1701  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1702  auto *FnTy =
1703  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1704  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1705  break;
1706  }
1708  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1709  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1710  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1712  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1713  auto *FnTy =
1714  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1715  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1716  break;
1717  }
1718  case OMPRTL__kmpc_critical: {
1719  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1720  // kmp_critical_name *crit);
1721  llvm::Type *TypeParams[] = {
1723  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1724  auto *FnTy =
1725  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1726  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1727  break;
1728  }
1730  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1731  // kmp_critical_name *crit, uintptr_t hint);
1732  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1733  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1734  CGM.IntPtrTy};
1735  auto *FnTy =
1736  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1737  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1738  break;
1739  }
1741  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1742  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1743  // typedef void *(*kmpc_ctor)(void *);
1744  auto *KmpcCtorTy =
1745  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1746  /*isVarArg*/ false)->getPointerTo();
1747  // typedef void *(*kmpc_cctor)(void *, void *);
1748  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1749  auto *KmpcCopyCtorTy =
1750  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1751  /*isVarArg*/ false)
1752  ->getPointerTo();
1753  // typedef void (*kmpc_dtor)(void *);
1754  auto *KmpcDtorTy =
1755  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1756  ->getPointerTo();
1757  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1758  KmpcCopyCtorTy, KmpcDtorTy};
1759  auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1760  /*isVarArg*/ false);
1761  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1762  break;
1763  }
1765  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1766  // kmp_critical_name *crit);
1767  llvm::Type *TypeParams[] = {
1769  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1770  auto *FnTy =
1771  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1773  break;
1774  }
1776  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1777  // global_tid);
1778  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1779  auto *FnTy =
1780  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1781  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1782  break;
1783  }
1784  case OMPRTL__kmpc_barrier: {
1785  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1786  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1787  auto *FnTy =
1788  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1789  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1790  break;
1791  }
1793  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1794  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1795  auto *FnTy =
1796  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1797  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1798  break;
1799  }
1801  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1802  // kmp_int32 num_threads)
1803  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1804  CGM.Int32Ty};
1805  auto *FnTy =
1806  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1808  break;
1809  }
1811  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1812  // global_tid);
1813  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814  auto *FnTy =
1815  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1816  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1817  break;
1818  }
1820  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1821  // global_tid);
1822  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1823  auto *FnTy =
1824  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1825  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1826  break;
1827  }
1828  case OMPRTL__kmpc_flush: {
1829  // Build void __kmpc_flush(ident_t *loc);
1830  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1831  auto *FnTy =
1832  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1833  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1834  break;
1835  }
1836  case OMPRTL__kmpc_master: {
1837  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1838  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1839  auto *FnTy =
1840  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1841  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1842  break;
1843  }
1844  case OMPRTL__kmpc_end_master: {
1845  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1846  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1847  auto *FnTy =
1848  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1849  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1850  break;
1851  }
1853  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1854  // int end_part);
1855  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1856  auto *FnTy =
1857  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1858  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1859  break;
1860  }
1861  case OMPRTL__kmpc_single: {
1862  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1863  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1864  auto *FnTy =
1865  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1866  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1867  break;
1868  }
1869  case OMPRTL__kmpc_end_single: {
1870  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1871  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1872  auto *FnTy =
1873  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1874  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1875  break;
1876  }
1878  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1879  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1880  // kmp_routine_entry_t *task_entry);
1881  assert(KmpRoutineEntryPtrTy != nullptr &&
1882  "Type kmp_routine_entry_t must be created.");
1883  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1884  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1885  // Return void * and then cast to particular kmp_task_t type.
1886  auto *FnTy =
1887  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1888  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1889  break;
1890  }
1891  case OMPRTL__kmpc_omp_task: {
1892  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1893  // *new_task);
1894  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1895  CGM.VoidPtrTy};
1896  auto *FnTy =
1897  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1898  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1899  break;
1900  }
1901  case OMPRTL__kmpc_copyprivate: {
1902  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1903  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1904  // kmp_int32 didit);
1905  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1906  auto *CpyFnTy =
1907  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1908  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1909  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1910  CGM.Int32Ty};
1911  auto *FnTy =
1912  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1913  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1914  break;
1915  }
1916  case OMPRTL__kmpc_reduce: {
1917  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1918  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1919  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1920  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1921  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1922  /*isVarArg=*/false);
1923  llvm::Type *TypeParams[] = {
1925  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1926  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1927  auto *FnTy =
1928  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1929  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1930  break;
1931  }
1933  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1934  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1935  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1936  // *lck);
1937  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1938  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1939  /*isVarArg=*/false);
1940  llvm::Type *TypeParams[] = {
1942  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1943  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1944  auto *FnTy =
1945  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1946  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1947  break;
1948  }
1949  case OMPRTL__kmpc_end_reduce: {
1950  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1951  // kmp_critical_name *lck);
1952  llvm::Type *TypeParams[] = {
1954  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1955  auto *FnTy =
1956  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1957  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1958  break;
1959  }
1961  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1962  // kmp_critical_name *lck);
1963  llvm::Type *TypeParams[] = {
1965  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1966  auto *FnTy =
1967  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1968  RTLFn =
1969  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1970  break;
1971  }
1973  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1974  // *new_task);
1975  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1976  CGM.VoidPtrTy};
1977  auto *FnTy =
1978  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1979  RTLFn =
1980  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1981  break;
1982  }
1984  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1985  // *new_task);
1986  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1987  CGM.VoidPtrTy};
1988  auto *FnTy =
1989  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1990  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1991  /*Name=*/"__kmpc_omp_task_complete_if0");
1992  break;
1993  }
1994  case OMPRTL__kmpc_ordered: {
1995  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1996  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1997  auto *FnTy =
1998  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1999  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2000  break;
2001  }
2002  case OMPRTL__kmpc_end_ordered: {
2003  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2004  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2005  auto *FnTy =
2006  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2008  break;
2009  }
2011  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2012  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2013  auto *FnTy =
2014  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2015  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2016  break;
2017  }
2018  case OMPRTL__kmpc_taskgroup: {
2019  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2020  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2021  auto *FnTy =
2022  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2023  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2024  break;
2025  }
2027  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2028  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2029  auto *FnTy =
2030  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2031  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2032  break;
2033  }
2035  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2036  // int proc_bind)
2037  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2038  auto *FnTy =
2039  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2040  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2041  break;
2042  }
2044  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2045  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2046  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2047  llvm::Type *TypeParams[] = {
2050  auto *FnTy =
2051  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2052  RTLFn =
2053  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2054  break;
2055  }
2057  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2058  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2059  // kmp_depend_info_t *noalias_dep_list);
2060  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2063  auto *FnTy =
2064  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2065  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2066  break;
2067  }
2069  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2070  // global_tid, kmp_int32 cncl_kind)
2071  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2072  auto *FnTy =
2073  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2074  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2075  break;
2076  }
2077  case OMPRTL__kmpc_cancel: {
2078  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2079  // kmp_int32 cncl_kind)
2080  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2081  auto *FnTy =
2082  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2083  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2084  break;
2085  }
2087  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2088  // kmp_int32 num_teams, kmp_int32 num_threads)
2089  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2090  CGM.Int32Ty};
2091  auto *FnTy =
2092  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2093  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2094  break;
2095  }
2096  case OMPRTL__kmpc_fork_teams: {
2097  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2098  // microtask, ...);
2099  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2101  auto *FnTy =
2102  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2103  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2104  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2105  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2106  llvm::LLVMContext &Ctx = F->getContext();
2107  llvm::MDBuilder MDB(Ctx);
2108  // Annotate the callback behavior of the __kmpc_fork_teams:
2109  // - The callback callee is argument number 2 (microtask).
2110  // - The first two arguments of the callback callee are unknown (-1).
2111  // - All variadic arguments to the __kmpc_fork_teams are passed to the
2112  // callback callee.
2113  F->addMetadata(
2114  llvm::LLVMContext::MD_callback,
2115  *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2116  2, {-1, -1},
2117  /* VarArgsArePassed */ true)}));
2118  }
2119  }
2120  break;
2121  }
2122  case OMPRTL__kmpc_taskloop: {
2123  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2124  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2125  // sched, kmp_uint64 grainsize, void *task_dup);
2126  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2127  CGM.IntTy,
2128  CGM.VoidPtrTy,
2129  CGM.IntTy,
2130  CGM.Int64Ty->getPointerTo(),
2131  CGM.Int64Ty->getPointerTo(),
2132  CGM.Int64Ty,
2133  CGM.IntTy,
2134  CGM.IntTy,
2135  CGM.Int64Ty,
2136  CGM.VoidPtrTy};
2137  auto *FnTy =
2138  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2139  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2140  break;
2141  }
2143  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2144  // num_dims, struct kmp_dim *dims);
2145  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2146  CGM.Int32Ty,
2147  CGM.Int32Ty,
2148  CGM.VoidPtrTy};
2149  auto *FnTy =
2150  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2151  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2152  break;
2153  }
2155  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2156  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2157  auto *FnTy =
2158  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2159  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2160  break;
2161  }
2163  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2164  // *vec);
2165  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2166  CGM.Int64Ty->getPointerTo()};
2167  auto *FnTy =
2168  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2169  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2170  break;
2171  }
2173  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2174  // *vec);
2175  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2176  CGM.Int64Ty->getPointerTo()};
2177  auto *FnTy =
2178  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2179  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2180  break;
2181  }
2183  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2184  // *data);
2185  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2186  auto *FnTy =
2187  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2188  RTLFn =
2189  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2190  break;
2191  }
2193  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2194  // *d);
2195  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2196  auto *FnTy =
2197  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2198  RTLFn = CGM.CreateRuntimeFunction(
2199  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2200  break;
2201  }
2202  case OMPRTL__kmpc_alloc: {
2203  // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2204  // al); omp_allocator_handle_t type is void *.
2205  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2206  auto *FnTy =
2207  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2208  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2209  break;
2210  }
2211  case OMPRTL__kmpc_free: {
2212  // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2213  // al); omp_allocator_handle_t type is void *.
2214  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2215  auto *FnTy =
2216  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2217  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2218  break;
2219  }
2221  // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2222  // size);
2223  llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2224  llvm::FunctionType *FnTy =
2225  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2226  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2227  break;
2228  }
2229  case OMPRTL__tgt_target: {
2230  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2231  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2232  // *arg_types);
2233  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2234  CGM.VoidPtrTy,
2235  CGM.Int32Ty,
2236  CGM.VoidPtrPtrTy,
2237  CGM.VoidPtrPtrTy,
2238  CGM.SizeTy->getPointerTo(),
2239  CGM.Int64Ty->getPointerTo()};
2240  auto *FnTy =
2241  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2242  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2243  break;
2244  }
2246  // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2247  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2248  // int64_t *arg_types);
2249  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2250  CGM.VoidPtrTy,
2251  CGM.Int32Ty,
2252  CGM.VoidPtrPtrTy,
2253  CGM.VoidPtrPtrTy,
2254  CGM.SizeTy->getPointerTo(),
2255  CGM.Int64Ty->getPointerTo()};
2256  auto *FnTy =
2257  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2258  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2259  break;
2260  }
2261  case OMPRTL__tgt_target_teams: {
2262  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2263  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2264  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2265  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2266  CGM.VoidPtrTy,
2267  CGM.Int32Ty,
2268  CGM.VoidPtrPtrTy,
2269  CGM.VoidPtrPtrTy,
2270  CGM.SizeTy->getPointerTo(),
2271  CGM.Int64Ty->getPointerTo(),
2272  CGM.Int32Ty,
2273  CGM.Int32Ty};
2274  auto *FnTy =
2275  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2276  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2277  break;
2278  }
2280  // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2281  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2282  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2283  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284  CGM.VoidPtrTy,
2285  CGM.Int32Ty,
2286  CGM.VoidPtrPtrTy,
2287  CGM.VoidPtrPtrTy,
2288  CGM.SizeTy->getPointerTo(),
2289  CGM.Int64Ty->getPointerTo(),
2290  CGM.Int32Ty,
2291  CGM.Int32Ty};
2292  auto *FnTy =
2293  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2294  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2295  break;
2296  }
2297  case OMPRTL__tgt_register_lib: {
2298  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2299  QualType ParamTy =
2301  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2302  auto *FnTy =
2303  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2304  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2305  break;
2306  }
2308  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2309  QualType ParamTy =
2311  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2312  auto *FnTy =
2313  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2314  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2315  break;
2316  }
2318  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2319  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2320  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2321  CGM.Int32Ty,
2322  CGM.VoidPtrPtrTy,
2323  CGM.VoidPtrPtrTy,
2324  CGM.SizeTy->getPointerTo(),
2325  CGM.Int64Ty->getPointerTo()};
2326  auto *FnTy =
2327  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2328  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2329  break;
2330  }
2332  // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2333  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2334  // *arg_types);
2335  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2336  CGM.Int32Ty,
2337  CGM.VoidPtrPtrTy,
2338  CGM.VoidPtrPtrTy,
2339  CGM.SizeTy->getPointerTo(),
2340  CGM.Int64Ty->getPointerTo()};
2341  auto *FnTy =
2342  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2343  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2344  break;
2345  }
2347  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2348  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2349  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2350  CGM.Int32Ty,
2351  CGM.VoidPtrPtrTy,
2352  CGM.VoidPtrPtrTy,
2353  CGM.SizeTy->getPointerTo(),
2354  CGM.Int64Ty->getPointerTo()};
2355  auto *FnTy =
2356  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2357  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2358  break;
2359  }
2361  // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2362  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2363  // *arg_types);
2364  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2365  CGM.Int32Ty,
2366  CGM.VoidPtrPtrTy,
2367  CGM.VoidPtrPtrTy,
2368  CGM.SizeTy->getPointerTo(),
2369  CGM.Int64Ty->getPointerTo()};
2370  auto *FnTy =
2371  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2372  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2373  break;
2374  }
2376  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2377  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2378  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2379  CGM.Int32Ty,
2380  CGM.VoidPtrPtrTy,
2381  CGM.VoidPtrPtrTy,
2382  CGM.SizeTy->getPointerTo(),
2383  CGM.Int64Ty->getPointerTo()};
2384  auto *FnTy =
2385  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2387  break;
2388  }
2390  // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2391  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2392  // *arg_types);
2393  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2394  CGM.Int32Ty,
2395  CGM.VoidPtrPtrTy,
2396  CGM.VoidPtrPtrTy,
2397  CGM.SizeTy->getPointerTo(),
2398  CGM.Int64Ty->getPointerTo()};
2399  auto *FnTy =
2400  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2402  break;
2403  }
2404  }
2405  assert(RTLFn && "Unable to find OpenMP runtime function");
2406  return RTLFn;
2407 }
2408 
2409 llvm::FunctionCallee
2410 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2411  assert((IVSize == 32 || IVSize == 64) &&
2412  "IV size is not compatible with the omp runtime");
2413  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2414  : "__kmpc_for_static_init_4u")
2415  : (IVSigned ? "__kmpc_for_static_init_8"
2416  : "__kmpc_for_static_init_8u");
2417  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2418  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2419  llvm::Type *TypeParams[] = {
2420  getIdentTyPointerTy(), // loc
2421  CGM.Int32Ty, // tid
2422  CGM.Int32Ty, // schedtype
2423  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2424  PtrTy, // p_lower
2425  PtrTy, // p_upper
2426  PtrTy, // p_stride
2427  ITy, // incr
2428  ITy // chunk
2429  };
2430  auto *FnTy =
2431  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2432  return CGM.CreateRuntimeFunction(FnTy, Name);
2433 }
2434 
2435 llvm::FunctionCallee
2436 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2437  assert((IVSize == 32 || IVSize == 64) &&
2438  "IV size is not compatible with the omp runtime");
2439  StringRef Name =
2440  IVSize == 32
2441  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2442  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2443  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2444  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2445  CGM.Int32Ty, // tid
2446  CGM.Int32Ty, // schedtype
2447  ITy, // lower
2448  ITy, // upper
2449  ITy, // stride
2450  ITy // chunk
2451  };
2452  auto *FnTy =
2453  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2454  return CGM.CreateRuntimeFunction(FnTy, Name);
2455 }
2456 
2457 llvm::FunctionCallee
2458 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2459  assert((IVSize == 32 || IVSize == 64) &&
2460  "IV size is not compatible with the omp runtime");
2461  StringRef Name =
2462  IVSize == 32
2463  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2464  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2465  llvm::Type *TypeParams[] = {
2466  getIdentTyPointerTy(), // loc
2467  CGM.Int32Ty, // tid
2468  };
2469  auto *FnTy =
2470  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2471  return CGM.CreateRuntimeFunction(FnTy, Name);
2472 }
2473 
2474 llvm::FunctionCallee
2475 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2476  assert((IVSize == 32 || IVSize == 64) &&
2477  "IV size is not compatible with the omp runtime");
2478  StringRef Name =
2479  IVSize == 32
2480  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2481  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2482  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2483  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2484  llvm::Type *TypeParams[] = {
2485  getIdentTyPointerTy(), // loc
2486  CGM.Int32Ty, // tid
2487  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2488  PtrTy, // p_lower
2489  PtrTy, // p_upper
2490  PtrTy // p_stride
2491  };
2492  auto *FnTy =
2493  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2494  return CGM.CreateRuntimeFunction(FnTy, Name);
2495 }
2496 
2498  if (CGM.getLangOpts().OpenMPSimd)
2499  return Address::invalid();
2501  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2502  if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2503  SmallString<64> PtrName;
2504  {
2505  llvm::raw_svector_ostream OS(PtrName);
2506  OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2507  }
2508  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2509  if (!Ptr) {
2510  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2512  PtrName);
2513  if (!CGM.getLangOpts().OpenMPIsDevice) {
2514  auto *GV = cast<llvm::GlobalVariable>(Ptr);
2515  GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2516  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2517  }
2518  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2519  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2520  }
2521  return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2522  }
2523  return Address::invalid();
2524 }
2525 
2526 llvm::Constant *
2528  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2530  // Lookup the entry, lazily creating it if necessary.
2531  std::string Suffix = getName({"cache", ""});
2533  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2534 }
2535 
2537  const VarDecl *VD,
2538  Address VDAddr,
2539  SourceLocation Loc) {
2540  if (CGM.getLangOpts().OpenMPUseTLS &&
2542  return VDAddr;
2543 
2544  llvm::Type *VarTy = VDAddr.getElementType();
2545  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2546  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2547  CGM.Int8PtrTy),
2550  return Address(CGF.EmitRuntimeCall(
2552  VDAddr.getAlignment());
2553 }
2554 
2556  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2557  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2558  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2559  // library.
2560  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2562  OMPLoc);
2563  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2564  // to register constructor/destructor for variable.
2565  llvm::Value *Args[] = {
2566  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2567  Ctor, CopyCtor, Dtor};
2568  CGF.EmitRuntimeCall(
2570 }
2571 
2573  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2574  bool PerformInit, CodeGenFunction *CGF) {
2575  if (CGM.getLangOpts().OpenMPUseTLS &&
2577  return nullptr;
2578 
2579  VD = VD->getDefinition(CGM.getContext());
2580  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2581  QualType ASTTy = VD->getType();
2582 
2583  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2584  const Expr *Init = VD->getAnyInitializer();
2585  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2586  // Generate function that re-emits the declaration's initializer into the
2587  // threadprivate copy of the variable VD
2588  CodeGenFunction CtorCGF(CGM);
2589  FunctionArgList Args;
2590  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2591  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2593  Args.push_back(&Dst);
2594 
2595  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2596  CGM.getContext().VoidPtrTy, Args);
2597  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2598  std::string Name = getName({"__kmpc_global_ctor_", ""});
2599  llvm::Function *Fn =
2600  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2601  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2602  Args, Loc, Loc);
2603  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2604  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2605  CGM.getContext().VoidPtrTy, Dst.getLocation());
2606  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2607  Arg = CtorCGF.Builder.CreateElementBitCast(
2608  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2609  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2610  /*IsInitializer=*/true);
2611  ArgVal = CtorCGF.EmitLoadOfScalar(
2612  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2613  CGM.getContext().VoidPtrTy, Dst.getLocation());
2614  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2615  CtorCGF.FinishFunction();
2616  Ctor = Fn;
2617  }
2618  if (VD->getType().isDestructedType() != QualType::DK_none) {
2619  // Generate function that emits destructor call for the threadprivate copy
2620  // of the variable VD
2621  CodeGenFunction DtorCGF(CGM);
2622  FunctionArgList Args;
2623  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2624  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2626  Args.push_back(&Dst);
2627 
2628  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2629  CGM.getContext().VoidTy, Args);
2630  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2631  std::string Name = getName({"__kmpc_global_dtor_", ""});
2632  llvm::Function *Fn =
2633  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2634  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2635  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2636  Loc, Loc);
2637  // Create a scope with an artificial location for the body of this function.
2638  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2639  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2640  DtorCGF.GetAddrOfLocalVar(&Dst),
2641  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2642  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2643  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2644  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2645  DtorCGF.FinishFunction();
2646  Dtor = Fn;
2647  }
2648  // Do not emit init function if it is not required.
2649  if (!Ctor && !Dtor)
2650  return nullptr;
2651 
2652  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2653  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2654  /*isVarArg=*/false)
2655  ->getPointerTo();
2656  // Copying constructor for the threadprivate variable.
2657  // Must be NULL - reserved by runtime, but currently it requires that this
2658  // parameter is always NULL. Otherwise it fires assertion.
2659  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2660  if (Ctor == nullptr) {
2661  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2662  /*isVarArg=*/false)
2663  ->getPointerTo();
2664  Ctor = llvm::Constant::getNullValue(CtorTy);
2665  }
2666  if (Dtor == nullptr) {
2667  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2668  /*isVarArg=*/false)
2669  ->getPointerTo();
2670  Dtor = llvm::Constant::getNullValue(DtorTy);
2671  }
2672  if (!CGF) {
2673  auto *InitFunctionTy =
2674  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2675  std::string Name = getName({"__omp_threadprivate_init_", ""});
2676  llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2677  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2678  CodeGenFunction InitCGF(CGM);
2679  FunctionArgList ArgList;
2680  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2681  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2682  Loc, Loc);
2683  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2684  InitCGF.FinishFunction();
2685  return InitFunction;
2686  }
2687  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2688  }
2689  return nullptr;
2690 }
2691 
2692 /// Obtain information that uniquely identifies a target entry. This
2693 /// consists of the file and device IDs as well as line number associated with
2694 /// the relevant entry source location.
2696  unsigned &DeviceID, unsigned &FileID,
2697  unsigned &LineNum) {
2699 
2700  // The loc should be always valid and have a file ID (the user cannot use
2701  // #pragma directives in macros)
2702 
2703  assert(Loc.isValid() && "Source location is expected to be always valid.");
2704 
2705  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2706  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2707 
2708  llvm::sys::fs::UniqueID ID;
2709  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2710  SM.getDiagnostics().Report(diag::err_cannot_open_file)
2711  << PLoc.getFilename() << EC.message();
2712 
2713  DeviceID = ID.getDevice();
2714  FileID = ID.getFile();
2715  LineNum = PLoc.getLine();
2716 }
2717 
2719  llvm::GlobalVariable *Addr,
2720  bool PerformInit) {
2722  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2723  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2724  return CGM.getLangOpts().OpenMPIsDevice;
2725  VD = VD->getDefinition(CGM.getContext());
2726  if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2727  return CGM.getLangOpts().OpenMPIsDevice;
2728 
2729  QualType ASTTy = VD->getType();
2730 
2732  // Produce the unique prefix to identify the new target regions. We use
2733  // the source location of the variable declaration which we know to not
2734  // conflict with any target region.
2735  unsigned DeviceID;
2736  unsigned FileID;
2737  unsigned Line;
2738  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2739  SmallString<128> Buffer, Out;
2740  {
2741  llvm::raw_svector_ostream OS(Buffer);
2742  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2743  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2744  }
2745 
2746  const Expr *Init = VD->getAnyInitializer();
2747  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2748  llvm::Constant *Ctor;
2749  llvm::Constant *ID;
2750  if (CGM.getLangOpts().OpenMPIsDevice) {
2751  // Generate function that re-emits the declaration's initializer into
2752  // the threadprivate copy of the variable VD
2753  CodeGenFunction CtorCGF(CGM);
2754 
2756  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2757  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2758  FTy, Twine(Buffer, "_ctor"), FI, Loc);
2759  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2760  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2761  FunctionArgList(), Loc, Loc);
2762  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2763  CtorCGF.EmitAnyExprToMem(Init,
2764  Address(Addr, CGM.getContext().getDeclAlign(VD)),
2765  Init->getType().getQualifiers(),
2766  /*IsInitializer=*/true);
2767  CtorCGF.FinishFunction();
2768  Ctor = Fn;
2769  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2770  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2771  } else {
2772  Ctor = new llvm::GlobalVariable(
2773  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2774  llvm::GlobalValue::PrivateLinkage,
2775  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2776  ID = Ctor;
2777  }
2778 
2779  // Register the information for the entry associated with the constructor.
2780  Out.clear();
2782  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2784  }
2785  if (VD->getType().isDestructedType() != QualType::DK_none) {
2786  llvm::Constant *Dtor;
2787  llvm::Constant *ID;
2788  if (CGM.getLangOpts().OpenMPIsDevice) {
2789  // Generate function that emits destructor call for the threadprivate
2790  // copy of the variable VD
2791  CodeGenFunction DtorCGF(CGM);
2792 
2794  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2795  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2796  FTy, Twine(Buffer, "_dtor"), FI, Loc);
2797  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2798  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2799  FunctionArgList(), Loc, Loc);
2800  // Create a scope with an artificial location for the body of this
2801  // function.
2802  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2803  DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2804  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2805  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2806  DtorCGF.FinishFunction();
2807  Dtor = Fn;
2808  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2809  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2810  } else {
2811  Dtor = new llvm::GlobalVariable(
2812  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2813  llvm::GlobalValue::PrivateLinkage,
2814  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2815  ID = Dtor;
2816  }
2817  // Register the information for the entry associated with the destructor.
2818  Out.clear();
2820  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2822  }
2823  return CGM.getLangOpts().OpenMPIsDevice;
2824 }
2825 
2827  QualType VarType,
2828  StringRef Name) {
2829  std::string Suffix = getName({"artificial", ""});
2830  std::string CacheSuffix = getName({"cache", ""});
2831  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2832  llvm::Value *GAddr =
2833  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2834  llvm::Value *Args[] = {
2836  getThreadID(CGF, SourceLocation()),
2838  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2839  /*IsSigned=*/false),
2841  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2842  return Address(
2844  CGF.EmitRuntimeCall(
2846  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2847  CGM.getPointerAlign());
2848 }
2849 
2851  const RegionCodeGenTy &ThenGen,
2852  const RegionCodeGenTy &ElseGen) {
2853  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2854 
2855  // If the condition constant folds and can be elided, try to avoid emitting
2856  // the condition and the dead arm of the if/else.
2857  bool CondConstant;
2858  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2859  if (CondConstant)
2860  ThenGen(CGF);
2861  else
2862  ElseGen(CGF);
2863  return;
2864  }
2865 
2866  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2867  // emit the conditional branch.
2868  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2869  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2870  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2871  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2872 
2873  // Emit the 'then' code.
2874  CGF.EmitBlock(ThenBlock);
2875  ThenGen(CGF);
2876  CGF.EmitBranch(ContBlock);
2877  // Emit the 'else' code if present.
2878  // There is no need to emit line number for unconditional branch.
2880  CGF.EmitBlock(ElseBlock);
2881  ElseGen(CGF);
2882  // There is no need to emit line number for unconditional branch.
2884  CGF.EmitBranch(ContBlock);
2885  // Emit the continuation block for code after the if.
2886  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2887 }
2888 
2890  llvm::Function *OutlinedFn,
2891  ArrayRef<llvm::Value *> CapturedVars,
2892  const Expr *IfCond) {
2893  if (!CGF.HaveInsertPoint())
2894  return;
2895  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2896  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2897  PrePostActionTy &) {
2898  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2899  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2900  llvm::Value *Args[] = {
2901  RTLoc,
2902  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2903  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2905  RealArgs.append(std::begin(Args), std::end(Args));
2906  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2907 
2908  llvm::FunctionCallee RTLFn =
2909  RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2910  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2911  };
2912  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2913  PrePostActionTy &) {
2914  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2915  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2916  // Build calls:
2917  // __kmpc_serialized_parallel(&Loc, GTid);
2918  llvm::Value *Args[] = {RTLoc, ThreadID};
2919  CGF.EmitRuntimeCall(
2920  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2921 
2922  // OutlinedFn(&GTid, &zero, CapturedStruct);
2923  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2924  /*Name*/ ".zero.addr");
2925  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2926  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2927  // ThreadId for serialized parallels is 0.
2928  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2929  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2930  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2931  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2932 
2933  // __kmpc_end_serialized_parallel(&Loc, GTid);
2934  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2935  CGF.EmitRuntimeCall(
2936  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2937  EndArgs);
2938  };
2939  if (IfCond) {
2940  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2941  } else {
2942  RegionCodeGenTy ThenRCG(ThenGen);
2943  ThenRCG(CGF);
2944  }
2945 }
2946 
2947 // If we're inside an (outlined) parallel region, use the region info's
2948 // thread-ID variable (it is passed in a first argument of the outlined function
2949 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2950 // regular serial code region, get thread ID by calling kmp_int32
2951 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2952 // return the address of that temp.
2954  SourceLocation Loc) {
2955  if (auto *OMPRegionInfo =
2956  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2957  if (OMPRegionInfo->getThreadIDVariable())
2958  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2959 
2960  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2961  QualType Int32Ty =
2962  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2963  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2964  CGF.EmitStoreOfScalar(ThreadID,
2965  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2966 
2967  return ThreadIDTemp;
2968 }
2969 
2971  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2972  SmallString<256> Buffer;
2973  llvm::raw_svector_ostream Out(Buffer);
2974  Out << Name;
2975  StringRef RuntimeName = Out.str();
2976  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2977  if (Elem.second) {
2978  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2979  "OMP internal variable has different type than requested");
2980  return &*Elem.second;
2981  }
2982 
2983  return Elem.second = new llvm::GlobalVariable(
2984  CGM.getModule(), Ty, /*IsConstant*/ false,
2985  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2986  Elem.first(), /*InsertBefore=*/nullptr,
2987  llvm::GlobalValue::NotThreadLocal, AddressSpace);
2988 }
2989 
2991  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2992  std::string Name = getName({Prefix, "var"});
2993  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2994 }
2995 
2996 namespace {
2997 /// Common pre(post)-action for different OpenMP constructs.
2998 class CommonActionTy final : public PrePostActionTy {
2999  llvm::FunctionCallee EnterCallee;
3000  ArrayRef<llvm::Value *> EnterArgs;
3001  llvm::FunctionCallee ExitCallee;
3002  ArrayRef<llvm::Value *> ExitArgs;
3003  bool Conditional;
3004  llvm::BasicBlock *ContBlock = nullptr;
3005 
3006 public:
3007  CommonActionTy(llvm::FunctionCallee EnterCallee,
3008  ArrayRef<llvm::Value *> EnterArgs,
3009  llvm::FunctionCallee ExitCallee,
3010  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3011  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3012  ExitArgs(ExitArgs), Conditional(Conditional) {}
3013  void Enter(CodeGenFunction &CGF) override {
3014  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3015  if (Conditional) {
3016  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3017  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3018  ContBlock = CGF.createBasicBlock("omp_if.end");
3019  // Generate the branch (If-stmt)
3020  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3021  CGF.EmitBlock(ThenBlock);
3022  }
3023  }
3024  void Done(CodeGenFunction &CGF) {
3025  // Emit the rest of blocks/branches
3026  CGF.EmitBranch(ContBlock);
3027  CGF.EmitBlock(ContBlock, true);
3028  }
3029  void Exit(CodeGenFunction &CGF) override {
3030  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3031  }
3032 };
3033 } // anonymous namespace
3034 
3036  StringRef CriticalName,
3037  const RegionCodeGenTy &CriticalOpGen,
3038  SourceLocation Loc, const Expr *Hint) {
3039  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3040  // CriticalOpGen();
3041  // __kmpc_end_critical(ident_t *, gtid, Lock);
3042  // Prepare arguments and build a call to __kmpc_critical
3043  if (!CGF.HaveInsertPoint())
3044  return;
3045  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3046  getCriticalRegionLock(CriticalName)};
3047  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3048  std::end(Args));
3049  if (Hint) {
3050  EnterArgs.push_back(CGF.Builder.CreateIntCast(
3051  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3052  }
3053  CommonActionTy Action(
3057  CriticalOpGen.setAction(Action);
3058  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3059 }
3060 
3062  const RegionCodeGenTy &MasterOpGen,
3063  SourceLocation Loc) {
3064  if (!CGF.HaveInsertPoint())
3065  return;
3066  // if(__kmpc_master(ident_t *, gtid)) {
3067  // MasterOpGen();
3068  // __kmpc_end_master(ident_t *, gtid);
3069  // }
3070  // Prepare arguments and build a call to __kmpc_master
3071  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3072  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3074  /*Conditional=*/true);
3075  MasterOpGen.setAction(Action);
3076  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3077  Action.Done(CGF);
3078 }
3079 
3081  SourceLocation Loc) {
3082  if (!CGF.HaveInsertPoint())
3083  return;
3084  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3085  llvm::Value *Args[] = {
3086  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3087  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3089  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3090  Region->emitUntiedSwitch(CGF);
3091 }
3092 
3094  const RegionCodeGenTy &TaskgroupOpGen,
3095  SourceLocation Loc) {
3096  if (!CGF.HaveInsertPoint())
3097  return;
3098  // __kmpc_taskgroup(ident_t *, gtid);
3099  // TaskgroupOpGen();
3100  // __kmpc_end_taskgroup(ident_t *, gtid);
3101  // Prepare arguments and build a call to __kmpc_taskgroup
3102  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3103  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3105  Args);
3106  TaskgroupOpGen.setAction(Action);
3107  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3108 }
3109 
3110 /// Given an array of pointers to variables, project the address of a
3111 /// given variable.
3113  unsigned Index, const VarDecl *Var) {
3114  // Pull out the pointer to the variable.
3115  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3116  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3117 
3118  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3119  Addr = CGF.Builder.CreateElementBitCast(
3120  Addr, CGF.ConvertTypeForMem(Var->getType()));
3121  return Addr;
3122 }
3123 
3125  CodeGenModule &CGM, llvm::Type *ArgsType,
3126  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3127  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3128  SourceLocation Loc) {
3129  ASTContext &C = CGM.getContext();
3130  // void copy_func(void *LHSArg, void *RHSArg);
3131  FunctionArgList Args;
3132  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3134  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3136  Args.push_back(&LHSArg);
3137  Args.push_back(&RHSArg);
3138  const auto &CGFI =
3140  std::string Name =
3141  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3142  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3144  &CGM.getModule());
3145  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3146  Fn->setDoesNotRecurse();
3147  CodeGenFunction CGF(CGM);
3148  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3149  // Dest = (void*[n])(LHSArg);
3150  // Src = (void*[n])(RHSArg);
3152  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3153  ArgsType), CGF.getPointerAlign());
3155  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3156  ArgsType), CGF.getPointerAlign());
3157  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3158  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3159  // ...
3160  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3161  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3162  const auto *DestVar =
3163  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3164  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3165 
3166  const auto *SrcVar =
3167  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3168  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3169 
3170  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3171  QualType Type = VD->getType();
3172  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3173  }
3174  CGF.FinishFunction();
3175  return Fn;
3176 }
3177 
3179  const RegionCodeGenTy &SingleOpGen,
3180  SourceLocation Loc,
3181  ArrayRef<const Expr *> CopyprivateVars,
3182  ArrayRef<const Expr *> SrcExprs,
3183  ArrayRef<const Expr *> DstExprs,
3184  ArrayRef<const Expr *> AssignmentOps) {
3185  if (!CGF.HaveInsertPoint())
3186  return;
3187  assert(CopyprivateVars.size() == SrcExprs.size() &&
3188  CopyprivateVars.size() == DstExprs.size() &&
3189  CopyprivateVars.size() == AssignmentOps.size());
3190  ASTContext &C = CGM.getContext();
3191  // int32 did_it = 0;
3192  // if(__kmpc_single(ident_t *, gtid)) {
3193  // SingleOpGen();
3194  // __kmpc_end_single(ident_t *, gtid);
3195  // did_it = 1;
3196  // }
3197  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3198  // <copy_func>, did_it);
3199 
3200  Address DidIt = Address::invalid();
3201  if (!CopyprivateVars.empty()) {
3202  // int32 did_it = 0;
3203  QualType KmpInt32Ty =
3204  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3205  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3206  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3207  }
3208  // Prepare arguments and build a call to __kmpc_single
3209  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3210  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3212  /*Conditional=*/true);
3213  SingleOpGen.setAction(Action);
3214  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3215  if (DidIt.isValid()) {
3216  // did_it = 1;
3217  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3218  }
3219  Action.Done(CGF);
3220  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3221  // <copy_func>, did_it);
3222  if (DidIt.isValid()) {
3223  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3224  QualType CopyprivateArrayTy =
3225  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3226  /*IndexTypeQuals=*/0);
3227  // Create a list of all private variables for copyprivate.
3228  Address CopyprivateList =
3229  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3230  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3231  Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3232  CGF.Builder.CreateStore(
3234  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3235  Elem);
3236  }
3237  // Build function that copies private values from single region to all other
3238  // threads in the corresponding parallel region.
3240  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3241  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3242  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3243  Address CL =
3244  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3245  CGF.VoidPtrTy);
3246  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3247  llvm::Value *Args[] = {
3248  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3249  getThreadID(CGF, Loc), // i32 <gtid>
3250  BufSize, // size_t <buf_size>
3251  CL.getPointer(), // void *<copyprivate list>
3252  CpyFn, // void (*) (void *, void *) <copy_func>
3253  DidItVal // i32 did_it
3254  };
3256  }
3257 }
3258 
3260  const RegionCodeGenTy &OrderedOpGen,
3261  SourceLocation Loc, bool IsThreads) {
3262  if (!CGF.HaveInsertPoint())
3263  return;
3264  // __kmpc_ordered(ident_t *, gtid);
3265  // OrderedOpGen();
3266  // __kmpc_end_ordered(ident_t *, gtid);
3267  // Prepare arguments and build a call to __kmpc_ordered
3268  if (IsThreads) {
3269  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3270  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3272  Args);
3273  OrderedOpGen.setAction(Action);
3274  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3275  return;
3276  }
3277  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3278 }
3279 
3281  unsigned Flags;
3282  if (Kind == OMPD_for)
3283  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3284  else if (Kind == OMPD_sections)
3285  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3286  else if (Kind == OMPD_single)
3287  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3288  else if (Kind == OMPD_barrier)
3289  Flags = OMP_IDENT_BARRIER_EXPL;
3290  else
3291  Flags = OMP_IDENT_BARRIER_IMPL;
3292  return Flags;
3293 }
3294 
3296  CodeGenFunction &CGF, const OMPLoopDirective &S,
3297  OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3298  // Check if the loop directive is actually a doacross loop directive. In this
3299  // case choose static, 1 schedule.
3300  if (llvm::any_of(
3302  [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3303  ScheduleKind = OMPC_SCHEDULE_static;
3304  // Chunk size is 1 in this case.
3305  llvm::APInt ChunkSize(32, 1);
3306  ChunkExpr = IntegerLiteral::Create(
3307  CGF.getContext(), ChunkSize,
3308  CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3309  SourceLocation());
3310  }
3311 }
3312 
3314  OpenMPDirectiveKind Kind, bool EmitChecks,
3315  bool ForceSimpleCall) {
3316  if (!CGF.HaveInsertPoint())
3317  return;
3318  // Build call __kmpc_cancel_barrier(loc, thread_id);
3319  // Build call __kmpc_barrier(loc, thread_id);
3320  unsigned Flags = getDefaultFlagsForBarriers(Kind);
3321  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3322  // thread_id);
3323  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3324  getThreadID(CGF, Loc)};
3325  if (auto *OMPRegionInfo =
3326  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3327  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3328  llvm::Value *Result = CGF.EmitRuntimeCall(
3330  if (EmitChecks) {
3331  // if (__kmpc_cancel_barrier()) {
3332  // exit from construct;
3333  // }
3334  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3335  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3336  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3337  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3338  CGF.EmitBlock(ExitBB);
3339  // exit from construct;
3340  CodeGenFunction::JumpDest CancelDestination =
3341  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3342  CGF.EmitBranchThroughCleanup(CancelDestination);
3343  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3344  }
3345  return;
3346  }
3347  }
3349 }
3350 
3351 /// Map the OpenMP loop schedule to the runtime enumeration.
3353  bool Chunked, bool Ordered) {
3354  switch (ScheduleKind) {
3355  case OMPC_SCHEDULE_static:
3356  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3357  : (Ordered ? OMP_ord_static : OMP_sch_static);
3358  case OMPC_SCHEDULE_dynamic:
3360  case OMPC_SCHEDULE_guided:
3362  case OMPC_SCHEDULE_runtime:
3363  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3364  case OMPC_SCHEDULE_auto:
3365  return Ordered ? OMP_ord_auto : OMP_sch_auto;
3366  case OMPC_SCHEDULE_unknown:
3367  assert(!Chunked && "chunk was specified but schedule kind not known");
3368  return Ordered ? OMP_ord_static : OMP_sch_static;
3369  }
3370  llvm_unreachable("Unexpected runtime schedule");
3371 }
3372 
3373 /// Map the OpenMP distribute schedule to the runtime enumeration.
3374 static OpenMPSchedType
3376  // only static is allowed for dist_schedule
3378 }
3379 
3381  bool Chunked) const {
3382  OpenMPSchedType Schedule =
3383  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3384  return Schedule == OMP_sch_static;
3385 }
3386 
3388  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3389  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3390  return Schedule == OMP_dist_sch_static;
3391 }
3392 
3394  bool Chunked) const {
3395  OpenMPSchedType Schedule =
3396  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3397  return Schedule == OMP_sch_static_chunked;
3398 }
3399 
3401  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3402  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3403  return Schedule == OMP_dist_sch_static_chunked;
3404 }
3405 
3407  OpenMPSchedType Schedule =
3408  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3409  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3410  return Schedule != OMP_sch_static;
3411 }
3412 
3416  int Modifier = 0;
3417  switch (M1) {
3418  case OMPC_SCHEDULE_MODIFIER_monotonic:
3419  Modifier = OMP_sch_modifier_monotonic;
3420  break;
3421  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3422  Modifier = OMP_sch_modifier_nonmonotonic;
3423  break;
3424  case OMPC_SCHEDULE_MODIFIER_simd:
3425  if (Schedule == OMP_sch_static_chunked)
3427  break;
3430  break;
3431  }
3432  switch (M2) {
3433  case OMPC_SCHEDULE_MODIFIER_monotonic:
3434  Modifier = OMP_sch_modifier_monotonic;
3435  break;
3436  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3437  Modifier = OMP_sch_modifier_nonmonotonic;
3438  break;
3439  case OMPC_SCHEDULE_MODIFIER_simd:
3440  if (Schedule == OMP_sch_static_chunked)
3442  break;
3445  break;
3446  }
3447  return Schedule | Modifier;
3448 }
3449 
3451  CodeGenFunction &CGF, SourceLocation Loc,
3452  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3453  bool Ordered, const DispatchRTInput &DispatchValues) {
3454  if (!CGF.HaveInsertPoint())
3455  return;
3457  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3458  assert(Ordered ||
3459  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3460  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3461  Schedule != OMP_sch_static_balanced_chunked));
3462  // Call __kmpc_dispatch_init(
3463  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3464  // kmp_int[32|64] lower, kmp_int[32|64] upper,
3465  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3466 
3467  // If the Chunk was not specified in the clause - use default value 1.
3468  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3469  : CGF.Builder.getIntN(IVSize, 1);
3470  llvm::Value *Args[] = {
3471  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3472  CGF.Builder.getInt32(addMonoNonMonoModifier(
3473  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3474  DispatchValues.LB, // Lower
3475  DispatchValues.UB, // Upper
3476  CGF.Builder.getIntN(IVSize, 1), // Stride
3477  Chunk // Chunk
3478  };
3479  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3480 }
3481 
3483  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3484  llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3486  const CGOpenMPRuntime::StaticRTInput &Values) {
3487  if (!CGF.HaveInsertPoint())
3488  return;
3489 
3490  assert(!Values.Ordered);
3491  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3492  Schedule == OMP_sch_static_balanced_chunked ||
3493  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3494  Schedule == OMP_dist_sch_static ||
3495  Schedule == OMP_dist_sch_static_chunked);
3496 
3497  // Call __kmpc_for_static_init(
3498  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3499  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3500  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3501  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3502  llvm::Value *Chunk = Values.Chunk;
3503  if (Chunk == nullptr) {
3504  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3505  Schedule == OMP_dist_sch_static) &&
3506  "expected static non-chunked schedule");
3507  // If the Chunk was not specified in the clause - use default value 1.
3508  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3509  } else {
3510  assert((Schedule == OMP_sch_static_chunked ||
3511  Schedule == OMP_sch_static_balanced_chunked ||
3512  Schedule == OMP_ord_static_chunked ||
3513  Schedule == OMP_dist_sch_static_chunked) &&
3514  "expected static chunked schedule");
3515  }
3516  llvm::Value *Args[] = {
3517  UpdateLocation,
3518  ThreadId,
3519  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3520  M2)), // Schedule type
3521  Values.IL.getPointer(), // &isLastIter
3522  Values.LB.getPointer(), // &LB
3523  Values.UB.getPointer(), // &UB
3524  Values.ST.getPointer(), // &Stride
3525  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3526  Chunk // Chunk
3527  };
3528  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3529 }
3530 
3532  SourceLocation Loc,
3533  OpenMPDirectiveKind DKind,
3534  const OpenMPScheduleTy &ScheduleKind,
3535  const StaticRTInput &Values) {
3536  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3537  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3538  assert(isOpenMPWorksharingDirective(DKind) &&
3539  "Expected loop-based or sections-based directive.");
3540  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3541  isOpenMPLoopDirective(DKind)
3542  ? OMP_IDENT_WORK_LOOP
3543  : OMP_IDENT_WORK_SECTIONS);
3544  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3545  llvm::FunctionCallee StaticInitFunction =
3547  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3548  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3549 }
3550 
3552  CodeGenFunction &CGF, SourceLocation Loc,
3553  OpenMPDistScheduleClauseKind SchedKind,
3554  const CGOpenMPRuntime::StaticRTInput &Values) {
3555  OpenMPSchedType ScheduleNum =
3556  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3557  llvm::Value *UpdatedLocation =
3558  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3559  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3560  llvm::FunctionCallee StaticInitFunction =
3561  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3562  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3563  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3565 }
3566 
3568  SourceLocation Loc,
3569  OpenMPDirectiveKind DKind) {
3570  if (!CGF.HaveInsertPoint())
3571  return;
3572  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3573  llvm::Value *Args[] = {
3574  emitUpdateLocation(CGF, Loc,
3576  ? OMP_IDENT_WORK_DISTRIBUTE
3577  : isOpenMPLoopDirective(DKind)
3578  ? OMP_IDENT_WORK_LOOP
3579  : OMP_IDENT_WORK_SECTIONS),
3580  getThreadID(CGF, Loc)};
3582  Args);
3583 }
3584 
3586  SourceLocation Loc,
3587  unsigned IVSize,
3588  bool IVSigned) {
3589  if (!CGF.HaveInsertPoint())
3590  return;
3591  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3592  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3593  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3594 }
3595 
3597  SourceLocation Loc, unsigned IVSize,
3598  bool IVSigned, Address IL,
3599  Address LB, Address UB,
3600  Address ST) {
3601  // Call __kmpc_dispatch_next(
3602  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3603  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3604  // kmp_int[32|64] *p_stride);
3605  llvm::Value *Args[] = {
3606  emitUpdateLocation(CGF, Loc),
3607  getThreadID(CGF, Loc),
3608  IL.getPointer(), // &isLastIter
3609  LB.getPointer(), // &Lower
3610  UB.getPointer(), // &Upper
3611  ST.getPointer() // &Stride
3612  };
3613  llvm::Value *Call =
3614  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3615  return CGF.EmitScalarConversion(
3616  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3617  CGF.getContext().BoolTy, Loc);
3618 }
3619 
3621  llvm::Value *NumThreads,
3622  SourceLocation Loc) {
3623  if (!CGF.HaveInsertPoint())
3624  return;
3625  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3626  llvm::Value *Args[] = {
3627  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3628  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3630  Args);
3631 }
3632 
3634  OpenMPProcBindClauseKind ProcBind,
3635  SourceLocation Loc) {
3636  if (!CGF.HaveInsertPoint())
3637  return;
3638  // Constants for proc bind value accepted by the runtime.
3639  enum ProcBindTy {
3640  ProcBindFalse = 0,
3641  ProcBindTrue,
3642  ProcBindMaster,
3643  ProcBindClose,
3644  ProcBindSpread,
3645  ProcBindIntel,
3646  ProcBindDefault
3647  } RuntimeProcBind;
3648  switch (ProcBind) {
3649  case OMPC_PROC_BIND_master:
3650  RuntimeProcBind = ProcBindMaster;
3651  break;
3652  case OMPC_PROC_BIND_close:
3653  RuntimeProcBind = ProcBindClose;
3654  break;
3655  case OMPC_PROC_BIND_spread:
3656  RuntimeProcBind = ProcBindSpread;
3657  break;
3659  llvm_unreachable("Unsupported proc_bind value.");
3660  }
3661  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3662  llvm::Value *Args[] = {
3663  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3664  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3666 }
3667 
3668 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3669  SourceLocation Loc) {
3670  if (!CGF.HaveInsertPoint())
3671  return;
3672  // Build call void __kmpc_flush(ident_t *loc)
3674  emitUpdateLocation(CGF, Loc));
3675 }
3676 
3677 namespace {
3678 /// Indexes of fields for type kmp_task_t.
3680  /// List of shared variables.
3681  KmpTaskTShareds,
3682  /// Task routine.
3683  KmpTaskTRoutine,
3684  /// Partition id for the untied tasks.
3685  KmpTaskTPartId,
3686  /// Function with call of destructors for private variables.
3687  Data1,
3688  /// Task priority.
3689  Data2,
3690  /// (Taskloops only) Lower bound.
3691  KmpTaskTLowerBound,
3692  /// (Taskloops only) Upper bound.
3693  KmpTaskTUpperBound,
3694  /// (Taskloops only) Stride.
3695  KmpTaskTStride,
3696  /// (Taskloops only) Is last iteration flag.
3697  KmpTaskTLastIter,
3698  /// (Taskloops only) Reduction data.
3699  KmpTaskTReductions,
3700 };
3701 } // anonymous namespace
3702 
3703 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3704  return OffloadEntriesTargetRegion.empty() &&
3705  OffloadEntriesDeviceGlobalVar.empty();
3706 }
3707 
3708 /// Initialize target region entry.
3709 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3710  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3711  StringRef ParentName, unsigned LineNum,
3712  unsigned Order) {
3713  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3714  "only required for the device "
3715  "code generation.");
3716  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3717  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3718  OMPTargetRegionEntryTargetRegion);
3719  ++OffloadingEntriesNum;
3720 }
3721 
3722 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3723  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3724  StringRef ParentName, unsigned LineNum,
3725  llvm::Constant *Addr, llvm::Constant *ID,
3726  OMPTargetRegionEntryKind Flags) {
3727  // If we are emitting code for a target, the entry is already initialized,
3728  // only has to be registered.
3729  if (CGM.getLangOpts().OpenMPIsDevice) {
3730  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3731  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3733  "Unable to find target region on line '%0' in the device code.");
3734  CGM.getDiags().Report(DiagID) << LineNum;
3735  return;
3736  }
3737  auto &Entry =
3738  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3739  assert(Entry.isValid() && "Entry not initialized!");
3740  Entry.setAddress(Addr);
3741  Entry.setID(ID);
3742  Entry.setFlags(Flags);
3743  } else {
3744  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3745  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3746  ++OffloadingEntriesNum;
3747  }
3748 }
3749 
3750 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3751  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3752  unsigned LineNum) const {
3753  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3754  if (PerDevice == OffloadEntriesTargetRegion.end())
3755  return false;
3756  auto PerFile = PerDevice->second.find(FileID);
3757  if (PerFile == PerDevice->second.end())
3758  return false;
3759  auto PerParentName = PerFile->second.find(ParentName);
3760  if (PerParentName == PerFile->second.end())
3761  return false;
3762  auto PerLine = PerParentName->second.find(LineNum);
3763  if (PerLine == PerParentName->second.end())
3764  return false;
3765  // Fail if this entry is already registered.
3766  if (PerLine->second.getAddress() || PerLine->second.getID())
3767  return false;
3768  return true;
3769 }
3770 
3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3772  const OffloadTargetRegionEntryInfoActTy &Action) {
3773  // Scan all target region entries and perform the provided action.
3774  for (const auto &D : OffloadEntriesTargetRegion)
3775  for (const auto &F : D.second)
3776  for (const auto &P : F.second)
3777  for (const auto &L : P.second)
3778  Action(D.first, F.first, P.first(), L.first, L.second);
3779 }
3780 
3781 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3782  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3783  OMPTargetGlobalVarEntryKind Flags,
3784  unsigned Order) {
3785  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3786  "only required for the device "
3787  "code generation.");
3788  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3789  ++OffloadingEntriesNum;
3790 }
3791 
3792 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3793  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3794  CharUnits VarSize,
3795  OMPTargetGlobalVarEntryKind Flags,
3796  llvm::GlobalValue::LinkageTypes Linkage) {
3797  if (CGM.getLangOpts().OpenMPIsDevice) {
3798  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3799  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3800  "Entry not initialized!");
3801  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3802  "Resetting with the new address.");
3803  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3804  if (Entry.getVarSize().isZero()) {
3805  Entry.setVarSize(VarSize);
3806  Entry.setLinkage(Linkage);
3807  }
3808  return;
3809  }
3810  Entry.setVarSize(VarSize);
3811  Entry.setLinkage(Linkage);
3812  Entry.setAddress(Addr);
3813  } else {
3814  if (hasDeviceGlobalVarEntryInfo(VarName)) {
3815  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3816  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3817  "Entry not initialized!");
3818  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3819  "Resetting with the new address.");
3820  if (Entry.getVarSize().isZero()) {
3821  Entry.setVarSize(VarSize);
3822  Entry.setLinkage(Linkage);
3823  }
3824  return;
3825  }
3826  OffloadEntriesDeviceGlobalVar.try_emplace(
3827  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3828  ++OffloadingEntriesNum;
3829  }
3830 }
3831 
3832 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3833  actOnDeviceGlobalVarEntriesInfo(
3834  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3835  // Scan all target region entries and perform the provided action.
3836  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3837  Action(E.getKey(), E.getValue());
3838 }
3839 
3840 llvm::Function *
3842  // If we don't have entries or if we are emitting code for the device, we
3843  // don't need to do anything.
3844  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3845  return nullptr;
3846 
3847  llvm::Module &M = CGM.getModule();
3848  ASTContext &C = CGM.getContext();
3849 
3850  // Get list of devices we care about
3851  const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3852 
3853  // We should be creating an offloading descriptor only if there are devices
3854  // specified.
3855  assert(!Devices.empty() && "No OpenMP offloading devices??");
3856 
3857  // Create the external variables that will point to the begin and end of the
3858  // host entries section. These will be defined by the linker.
3859  llvm::Type *OffloadEntryTy =
3861  std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3862  auto *HostEntriesBegin = new llvm::GlobalVariable(
3863  M, OffloadEntryTy, /*isConstant=*/true,
3864  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3865  EntriesBeginName);
3866  std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3867  auto *HostEntriesEnd =
3868  new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3870  /*Initializer=*/nullptr, EntriesEndName);
3871 
3872  // Create all device images
3873  auto *DeviceImageTy = cast<llvm::StructType>(
3875  ConstantInitBuilder DeviceImagesBuilder(CGM);
3876  ConstantArrayBuilder DeviceImagesEntries =
3877  DeviceImagesBuilder.beginArray(DeviceImageTy);
3878 
3879  for (const llvm::Triple &Device : Devices) {
3880  StringRef T = Device.getTriple();
3881  std::string BeginName = getName({"omp_offloading", "img_start", ""});
3882  auto *ImgBegin = new llvm::GlobalVariable(
3883  M, CGM.Int8Ty, /*isConstant=*/true,
3884  llvm::GlobalValue::ExternalWeakLinkage,
3885  /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3886  std::string EndName = getName({"omp_offloading", "img_end", ""});
3887  auto *ImgEnd = new llvm::GlobalVariable(
3888  M, CGM.Int8Ty, /*isConstant=*/true,
3889  llvm::GlobalValue::ExternalWeakLinkage,
3890  /*Initializer=*/nullptr, Twine(EndName).concat(T));
3891 
3892  llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3893  HostEntriesEnd};
3895  DeviceImagesEntries);
3896  }
3897 
3898  // Create device images global array.
3899  std::string ImagesName = getName({"omp_offloading", "device_images"});
3900  llvm::GlobalVariable *DeviceImages =
3901  DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3902  CGM.getPointerAlign(),
3903  /*isConstant=*/true);
3904  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3905 
3906  // This is a Zero array to be used in the creation of the constant expressions
3907  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3908  llvm::Constant::getNullValue(CGM.Int32Ty)};
3909 
3910  // Create the target region descriptor.
3911  llvm::Constant *Data[] = {
3912  llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3913  llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3914  DeviceImages, Index),
3915  HostEntriesBegin, HostEntriesEnd};
3916  std::string Descriptor = getName({"omp_offloading", "descriptor"});
3917  llvm::GlobalVariable *Desc = createGlobalStruct(
3918  CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3919 
3920  // Emit code to register or unregister the descriptor at execution
3921  // startup or closing, respectively.
3922 
3923  llvm::Function *UnRegFn;
3924  {
3925  FunctionArgList Args;
3927  Args.push_back(&DummyPtr);
3928 
3929  CodeGenFunction CGF(CGM);
3930  // Disable debug info for global (de-)initializer because they are not part
3931  // of some particular construct.
3932  CGF.disableDebugInfo();
3933  const auto &FI =
3935  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3936  std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3937  UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3938  CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3940  Desc);
3941  CGF.FinishFunction();
3942  }
3943  llvm::Function *RegFn;
3944  {
3945  CodeGenFunction CGF(CGM);
3946  // Disable debug info for global (de-)initializer because they are not part
3947  // of some particular construct.
3948  CGF.disableDebugInfo();
3949  const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3950  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3951 
3952  // Encode offload target triples into the registration function name. It
3953  // will serve as a comdat key for the registration/unregistration code for
3954  // this particular combination of offloading targets.
3955  SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3956  RegFnNameParts[0] = "omp_offloading";
3957  RegFnNameParts[1] = "descriptor_reg";
3958  llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3959  [](const llvm::Triple &T) -> const std::string& {
3960  return T.getTriple();
3961  });
3962  llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3963  std::string Descriptor = getName(RegFnNameParts);
3964  RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3965  CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3967  // Create a variable to drive the registration and unregistration of the
3968  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3969  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3970  SourceLocation(), nullptr, C.CharTy,
3972  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3973  CGF.FinishFunction();
3974  }
3975  if (CGM.supportsCOMDAT()) {
3976  // It is sufficient to call registration function only once, so create a
3977  // COMDAT group for registration/unregistration functions and associated
3978  // data. That would reduce startup time and code size. Registration
3979  // function serves as a COMDAT group key.
3980  llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3981  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3982  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3983  RegFn->setComdat(ComdatKey);
3984  UnRegFn->setComdat(ComdatKey);
3985  DeviceImages->setComdat(ComdatKey);
3986  Desc->setComdat(ComdatKey);
3987  }
3988  return RegFn;
3989 }
3990 
3992  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3993  llvm::GlobalValue::LinkageTypes Linkage) {
3994  StringRef Name = Addr->getName();
3995  llvm::Module &M = CGM.getModule();
3996  llvm::LLVMContext &C = M.getContext();
3997 
3998  // Create constant string with the name.
3999  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4000 
4001  std::string StringName = getName({"omp_offloading", "entry_name"});
4002  auto *Str = new llvm::GlobalVariable(
4003  M, StrPtrInit->getType(), /*isConstant=*/true,
4004  llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4005  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4006 
4007  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4008  llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4009  llvm::ConstantInt::get(CGM.SizeTy, Size),
4010  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4011  llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4012  std::string EntryName = getName({"omp_offloading", "entry", ""});
4013  llvm::GlobalVariable *Entry = createGlobalStruct(
4014  CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4015  Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4016 
4017  // The entry has to be created in the section the linker expects it to be.
4018  std::string Section = getName({"omp_offloading", "entries"});
4019  Entry->setSection(Section);
4020 }
4021 
4023  // Emit the offloading entries and metadata so that the device codegen side
4024  // can easily figure out what to emit. The produced metadata looks like
4025  // this:
4026  //
4027  // !omp_offload.info = !{!1, ...}
4028  //
4029  // Right now we only generate metadata for function that contain target
4030  // regions.
4031 
4032  // If we do not have entries, we don't need to do anything.
4034  return;
4035 
4036  llvm::Module &M = CGM.getModule();
4037  llvm::LLVMContext &C = M.getContext();
4039  OrderedEntries(OffloadEntriesInfoManager.size());
4040  llvm::SmallVector<StringRef, 16> ParentFunctions(
4042 
4043  // Auxiliary methods to create metadata values and strings.
4044  auto &&GetMDInt = [this](unsigned V) {
4045  return llvm::ConstantAsMetadata::get(
4046  llvm::ConstantInt::get(CGM.Int32Ty, V));
4047  };
4048 
4049  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4050 
4051  // Create the offloading info metadata node.
4052  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4053 
4054  // Create function that emits metadata for each target region entry;
4055  auto &&TargetRegionMetadataEmitter =
4056  [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4057  unsigned DeviceID, unsigned FileID, StringRef ParentName,
4058  unsigned Line,
4060  // Generate metadata for target regions. Each entry of this metadata
4061  // contains:
4062  // - Entry 0 -> Kind of this type of metadata (0).
4063  // - Entry 1 -> Device ID of the file where the entry was identified.
4064  // - Entry 2 -> File ID of the file where the entry was identified.
4065  // - Entry 3 -> Mangled name of the function where the entry was
4066  // identified.
4067  // - Entry 4 -> Line in the file where the entry was identified.
4068  // - Entry 5 -> Order the entry was created.
4069  // The first element of the metadata node is the kind.
4070  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4071  GetMDInt(FileID), GetMDString(ParentName),
4072  GetMDInt(Line), GetMDInt(E.getOrder())};
4073 
4074  // Save this entry in the right position of the ordered entries array.
4075  OrderedEntries[E.getOrder()] = &E;
4076  ParentFunctions[E.getOrder()] = ParentName;
4077 
4078  // Add metadata to the named metadata node.
4079  MD->addOperand(llvm::MDNode::get(C, Ops));
4080  };
4081 
4083  TargetRegionMetadataEmitter);
4084 
4085  // Create function that emits metadata for each device global variable entry;
4086  auto &&DeviceGlobalVarMetadataEmitter =
4087  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4088  MD](StringRef MangledName,
4090  &E) {
4091  // Generate metadata for global variables. Each entry of this metadata
4092  // contains:
4093  // - Entry 0 -> Kind of this type of metadata (1).
4094  // - Entry 1 -> Mangled name of the variable.
4095  // - Entry 2 -> Declare target kind.
4096  // - Entry 3 -> Order the entry was created.
4097  // The first element of the metadata node is the kind.
4098  llvm::Metadata *Ops[] = {
4099  GetMDInt(E.getKind()), GetMDString(MangledName),
4100  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4101 
4102  // Save this entry in the right position of the ordered entries array.
4103  OrderedEntries[E.getOrder()] = &E;
4104 
4105  // Add metadata to the named metadata node.
4106  MD->addOperand(llvm::MDNode::get(C, Ops));
4107  };
4108 
4110  DeviceGlobalVarMetadataEmitter);
4111 
4112  for (const auto *E : OrderedEntries) {
4113  assert(E && "All ordered entries must exist!");
4114  if (const auto *CE =
4115  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4116  E)) {
4117  if (!CE->getID() || !CE->getAddress()) {
4118  // Do not blame the entry if the parent funtion is not emitted.
4119  StringRef FnName = ParentFunctions[CE->getOrder()];
4120  if (!CGM.GetGlobalValue(FnName))
4121  continue;
4122  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4124  "Offloading entry for target region is incorrect: either the "
4125  "address or the ID is invalid.");
4126  CGM.getDiags().Report(DiagID);
4127  continue;
4128  }
4129  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4130  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4131  } else if (const auto *CE =
4132  dyn_cast<OffloadEntriesInfoManagerTy::
4133  OffloadEntryInfoDeviceGlobalVar>(E)) {
4136  CE->getFlags());
4137  switch (Flags) {
4139  if (!CE->getAddress()) {
4140  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4142  "Offloading entry for declare target variable is incorrect: the "
4143  "address is invalid.");
4144  CGM.getDiags().Report(DiagID);
4145  continue;
4146  }
4147  // The vaiable has no definition - no need to add the entry.
4148  if (CE->getVarSize().isZero())
4149  continue;
4150  break;
4151  }
4153  assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4154  (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4155  "Declaret target link address is set.");
4156  if (CGM.getLangOpts().OpenMPIsDevice)
4157  continue;
4158  if (!CE->getAddress()) {
4159  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4161  "Offloading entry for declare target variable is incorrect: the "
4162  "address is invalid.");
4163  CGM.getDiags().Report(DiagID);
4164  continue;
4165  }
4166  break;
4167  }
4168  createOffloadEntry(CE->getAddress(), CE->getAddress(),
4169  CE->getVarSize().getQuantity(), Flags,
4170  CE->getLinkage());
4171  } else {
4172  llvm_unreachable("Unsupported entry kind.");
4173  }
4174  }
4175 }
4176 
4177 /// Loads all the offload entries information from the host IR
4178 /// metadata.
4180  // If we are in target mode, load the metadata from the host IR. This code has
4181  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4182 
4183  if (!CGM.getLangOpts().OpenMPIsDevice)
4184  return;
4185 
4186  if (CGM.getLangOpts().OMPHostIRFile.empty())
4187  return;
4188 
4189  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4190  if (auto EC = Buf.getError()) {
4191  CGM.getDiags().Report(diag::err_cannot_open_file)
4192  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4193  return;
4194  }
4195 
4196  llvm::LLVMContext C;
4197  auto ME = expectedToErrorOrAndEmitErrors(
4198  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4199 
4200  if (auto EC = ME.getError()) {
4201  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4202  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4203  CGM.getDiags().Report(DiagID)
4204  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4205  return;
4206  }
4207 
4208  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4209  if (!MD)
4210  return;
4211 
4212  for (llvm::MDNode *MN : MD->operands()) {
4213  auto &&GetMDInt = [MN](unsigned Idx) {
4214  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4215  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4216  };
4217 
4218  auto &&GetMDString = [MN](unsigned Idx) {
4219  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4220  return V->getString();
4221  };
4222 
4223  switch (GetMDInt(0)) {
4224  default:
4225  llvm_unreachable("Unexpected metadata!");
4226  break;
4230  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4231  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4232  /*Order=*/GetMDInt(5));
4233  break;
4237  /*MangledName=*/GetMDString(1),
4238  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4239  /*Flags=*/GetMDInt(2)),
4240  /*Order=*/GetMDInt(3));
4241  break;
4242  }
4243  }
4244 }
4245 
4247  if (!KmpRoutineEntryPtrTy) {
4248  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4249  ASTContext &C = CGM.getContext();
4250  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4252  KmpRoutineEntryPtrQTy = C.getPointerType(
4253  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4254  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4255  }
4256 }
4257 
4259  // Make sure the type of the entry is already created. This is the type we
4260  // have to create:
4261  // struct __tgt_offload_entry{
4262  // void *addr; // Pointer to the offload entry info.
4263  // // (function or global)
4264  // char *name; // Name of the function or global.
4265  // size_t size; // Size of the entry info (0 if it a function).
4266  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4267  // int32_t reserved; // Reserved, to use by the runtime library.
4268  // };
4269  if (TgtOffloadEntryQTy.isNull()) {
4270  ASTContext &C = CGM.getContext();
4271  RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4272  RD->startDefinition();
4273  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4275  addFieldToRecordDecl(C, RD, C.getSizeType());
4277  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4279  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4280  RD->completeDefinition();
4281  RD->addAttr(PackedAttr::CreateImplicit(C));
4283  }
4284  return TgtOffloadEntryQTy;
4285 }
4286 
4288  // These are the types we need to build:
4289  // struct __tgt_device_image{
4290  // void *ImageStart; // Pointer to the target code start.
4291  // void *ImageEnd; // Pointer to the target code end.
4292  // // We also add the host entries to the device image, as it may be useful
4293  // // for the target runtime to have access to that information.
4294  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
4295  // // the entries.
4296  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4297  // // entries (non inclusive).
4298  // };
4299  if (TgtDeviceImageQTy.isNull()) {
4300  ASTContext &C = CGM.getContext();
4301  RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4302  RD->startDefinition();
4303  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4304  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4307  RD->completeDefinition();
4309  }
4310  return TgtDeviceImageQTy;
4311 }
4312 
4314  // struct __tgt_bin_desc{
4315  // int32_t NumDevices; // Number of devices supported.
4316  // __tgt_device_image *DeviceImages; // Arrays of device images
4317  // // (one per device).
4318  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
4319  // // entries.
4320  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4321  // // entries (non inclusive).
4322  // };
4324  ASTContext &C = CGM.getContext();
4325  RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4326  RD->startDefinition();
4328  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4332  RD->completeDefinition();
4334  }
4335  return TgtBinaryDescriptorQTy;
4336 }
4337 
4338 namespace {
4339 struct PrivateHelpersTy {
4340  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4341  const VarDecl *PrivateElemInit)
4342  : Original(Original), PrivateCopy(PrivateCopy),
4343  PrivateElemInit(PrivateElemInit) {}
4344  const VarDecl *Original;
4345  const VarDecl *PrivateCopy;
4346  const VarDecl *PrivateElemInit;
4347 };
4348 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4349 } // anonymous namespace
4350 
4351 static RecordDecl *
4353  if (!Privates.empty()) {
4354  ASTContext &C = CGM.getContext();
4355  // Build struct .kmp_privates_t. {
4356  // /* private vars */
4357  // };
4358  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4359  RD->startDefinition();
4360  for (const auto &Pair : Privates) {
4361  const VarDecl *VD = Pair.second.Original;
4362  QualType Type = VD->getType().getNonReferenceType();
4363  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4364  if (VD->hasAttrs()) {
4365  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4366  E(VD->getAttrs().end());
4367  I != E; ++I)
4368  FD->addAttr(*I);
4369  }
4370  }
4371  RD->completeDefinition();
4372  return RD;
4373  }
4374  return nullptr;
4375 }
4376 
4377 static RecordDecl *
4379  QualType KmpInt32Ty,
4380  QualType KmpRoutineEntryPointerQTy) {
4381  ASTContext &C = CGM.getContext();
4382  // Build struct kmp_task_t {
4383  // void * shareds;
4384  // kmp_routine_entry_t routine;
4385  // kmp_int32 part_id;
4386  // kmp_cmplrdata_t data1;
4387  // kmp_cmplrdata_t data2;
4388  // For taskloops additional fields:
4389  // kmp_uint64 lb;
4390  // kmp_uint64 ub;
4391  // kmp_int64 st;
4392  // kmp_int32 liter;
4393  // void * reductions;
4394  // };
4395  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4396  UD->startDefinition();
4397  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4398  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4399  UD->completeDefinition();
4400  QualType KmpCmplrdataTy = C.getRecordType(UD);
4401  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4402  RD->startDefinition();
4403  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4404  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4405  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4406  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4407  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4408  if (isOpenMPTaskLoopDirective(Kind)) {
4409  QualType KmpUInt64Ty =
4410  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4411  QualType KmpInt64Ty =
4412  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4413  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4414  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4415  addFieldToRecordDecl(C, RD, KmpInt64Ty);
4416  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4417  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4418  }
4419  RD->completeDefinition();
4420  return RD;
4421 }
4422 
4423 static RecordDecl *
4425  ArrayRef<PrivateDataTy> Privates) {
4426  ASTContext &C = CGM.getContext();
4427  // Build struct kmp_task_t_with_privates {
4428  // kmp_task_t task_data;
4429  // .kmp_privates_t. privates;
4430  // };
4431  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4432  RD->startDefinition();
4433  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4434  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4435  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4436  RD->completeDefinition();
4437  return RD;
4438 }
4439 
4440 /// Emit a proxy function which accepts kmp_task_t as the second
4441 /// argument.
4442 /// \code
4443 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4444 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4445 /// For taskloops:
4446 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4447 /// tt->reductions, tt->shareds);
4448 /// return 0;
4449 /// }
4450 /// \endcode
4451 static llvm::Function *
4453  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4454  QualType KmpTaskTWithPrivatesPtrQTy,
4455  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4456  QualType SharedsPtrTy, llvm::Function *TaskFunction,
4457  llvm::Value *TaskPrivatesMap) {
4458  ASTContext &C = CGM.getContext();
4459  FunctionArgList Args;
4460  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4462  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4463  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4465  Args.push_back(&GtidArg);
4466  Args.push_back(&TaskTypeArg);
4467  const auto &TaskEntryFnInfo =
4468  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4469  llvm::FunctionType *TaskEntryTy =
4470  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4471  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4472  auto *TaskEntry = llvm::Function::Create(
4473  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4474  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4475  TaskEntry->setDoesNotRecurse();
4476  CodeGenFunction CGF(CGM);
4477  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4478  Loc, Loc);
4479 
4480  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4481  // tt,
4482  // For taskloops:
4483  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4484  // tt->task_data.shareds);
4485  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4486  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4487  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4488  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4489  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4490  const auto *KmpTaskTWithPrivatesQTyRD =
4491  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4492  LValue Base =
4493  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4494  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4495  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4496  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4497  llvm::Value *PartidParam = PartIdLVal.getPointer();
4498 
4499  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4500  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4502  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4503  CGF.ConvertTypeForMem(SharedsPtrTy));
4504 
4505  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4506  llvm::Value *PrivatesParam;
4507  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4508  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4509  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4510  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4511  } else {
4512  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4513  }
4514 
4515  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4516  TaskPrivatesMap,
4517  CGF.Builder
4519  TDBase.getAddress(), CGF.VoidPtrTy)
4520  .getPointer()};
4521  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4522  std::end(CommonArgs));
4523  if (isOpenMPTaskLoopDirective(Kind)) {
4524  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4525  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4526  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4527  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4528  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4529  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4530  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4531  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4532  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4533  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4534  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4535  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4536  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4537  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4538  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4539  CallArgs.push_back(LBParam);
4540  CallArgs.push_back(UBParam);
4541  CallArgs.push_back(StParam);
4542  CallArgs.push_back(LIParam);
4543  CallArgs.push_back(RParam);
4544  }
4545  CallArgs.push_back(SharedsParam);
4546 
4547  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4548  CallArgs);
4549  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4550  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4551  CGF.FinishFunction();
4552  return TaskEntry;
4553 }
4554 
4556  SourceLocation Loc,
4557  QualType KmpInt32Ty,
4558  QualType KmpTaskTWithPrivatesPtrQTy,
4559  QualType KmpTaskTWithPrivatesQTy) {
4560  ASTContext &C = CGM.getContext();
4561  FunctionArgList Args;
4562  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4564  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4565  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4567  Args.push_back(&GtidArg);
4568  Args.push_back(&TaskTypeArg);
4569  const auto &DestructorFnInfo =
4570  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4571  llvm::FunctionType *DestructorFnTy =
4572  CGM.getTypes().GetFunctionType(DestructorFnInfo);
4573  std::string Name =
4574  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4575  auto *DestructorFn =
4577  Name, &CGM.getModule());
4578  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4579  DestructorFnInfo);
4580  DestructorFn->setDoesNotRecurse();
4581  CodeGenFunction CGF(CGM);
4582  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4583  Args, Loc, Loc);
4584 
4586  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4587  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4588  const auto *KmpTaskTWithPrivatesQTyRD =
4589  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4590  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4591  Base = CGF.EmitLValueForField(Base, *FI);
4592  for (const auto *Field :
4593  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4594  if (QualType::DestructionKind DtorKind =
4595  Field->getType().isDestructedType()) {
4596  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4597  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4598  }
4599  }
4600  CGF.FinishFunction();
4601  return DestructorFn;
4602 }
4603 
4604 /// Emit a privates mapping function for correct handling of private and
4605 /// firstprivate variables.
4606 /// \code
4607 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4608 /// **noalias priv1,..., <tyn> **noalias privn) {
4609 /// *priv1 = &.privates.priv1;
4610 /// ...;
4611 /// *privn = &.privates.privn;
4612 /// }
4613 /// \endcode
4614 static llvm::Value *
4616  ArrayRef<const Expr *> PrivateVars,
4617  ArrayRef<const Expr *> FirstprivateVars,
4618  ArrayRef<const Expr *> LastprivateVars,
4619  QualType PrivatesQTy,
4620  ArrayRef<PrivateDataTy> Privates) {
4621  ASTContext &C = CGM.getContext();
4622  FunctionArgList Args;
4623  ImplicitParamDecl TaskPrivatesArg(
4624  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4625  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4627  Args.push_back(&TaskPrivatesArg);
4628  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4629  unsigned Counter = 1;
4630  for (const Expr *E : PrivateVars) {
4631  Args.push_back(ImplicitParamDecl::Create(
4632  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4634  .withConst()
4635  .withRestrict(),
4637  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4638  PrivateVarsPos[VD] = Counter;
4639  ++Counter;
4640  }
4641  for (const Expr *E : FirstprivateVars) {
4642  Args.push_back(ImplicitParamDecl::Create(
4643  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4645  .withConst()
4646  .withRestrict(),
4648  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4649  PrivateVarsPos[VD] = Counter;
4650  ++Counter;
4651  }
4652  for (const Expr *E : LastprivateVars) {
4653  Args.push_back(ImplicitParamDecl::Create(
4654  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4656  .withConst()
4657  .withRestrict(),
4659  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4660  PrivateVarsPos[VD] = Counter;
4661  ++Counter;
4662  }
4663  const auto &TaskPrivatesMapFnInfo =
4664  CGM.