clang  8.0.0svn
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/StmtOpenMP.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Bitcode/BitcodeReader.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Support/Format.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <cassert>
32 
33 using namespace clang;
34 using namespace CodeGen;
35 
36 namespace {
37 /// Base class for handling code generation inside OpenMP regions.
38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
39 public:
40  /// Kinds of OpenMP regions used in codegen.
41  enum CGOpenMPRegionKind {
42  /// Region with outlined function for standalone 'parallel'
43  /// directive.
44  ParallelOutlinedRegion,
45  /// Region with outlined function for standalone 'task' directive.
46  TaskOutlinedRegion,
47  /// Region for constructs that do not require function outlining,
48  /// like 'for', 'sections', 'atomic' etc. directives.
49  InlinedRegion,
50  /// Region with outlined function for standalone 'target' directive.
51  TargetRegion,
52  };
53 
54  CGOpenMPRegionInfo(const CapturedStmt &CS,
55  const CGOpenMPRegionKind RegionKind,
57  bool HasCancel)
58  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
59  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
60 
61  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
63  bool HasCancel)
64  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
65  Kind(Kind), HasCancel(HasCancel) {}
66 
67  /// Get a variable or parameter for storing global thread id
68  /// inside OpenMP construct.
69  virtual const VarDecl *getThreadIDVariable() const = 0;
70 
71  /// Emit the captured statement body.
72  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
73 
74  /// Get an LValue for the current ThreadID variable.
75  /// \return LValue for thread id variable. This LValue always has type int32*.
76  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
77 
78  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
79 
80  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
81 
82  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
83 
84  bool hasCancel() const { return HasCancel; }
85 
86  static bool classof(const CGCapturedStmtInfo *Info) {
87  return Info->getKind() == CR_OpenMP;
88  }
89 
90  ~CGOpenMPRegionInfo() override = default;
91 
92 protected:
93  CGOpenMPRegionKind RegionKind;
94  RegionCodeGenTy CodeGen;
96  bool HasCancel;
97 };
98 
99 /// API for captured statement code generation in OpenMP constructs.
100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
101 public:
102  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
103  const RegionCodeGenTy &CodeGen,
104  OpenMPDirectiveKind Kind, bool HasCancel,
105  StringRef HelperName)
106  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
107  HasCancel),
108  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
109  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
110  }
111 
112  /// Get a variable or parameter for storing global thread id
113  /// inside OpenMP construct.
114  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
115 
116  /// Get the name of the capture helper.
117  StringRef getHelperName() const override { return HelperName; }
118 
119  static bool classof(const CGCapturedStmtInfo *Info) {
120  return CGOpenMPRegionInfo::classof(Info) &&
121  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
122  ParallelOutlinedRegion;
123  }
124 
125 private:
126  /// A variable or parameter storing global thread id for OpenMP
127  /// constructs.
128  const VarDecl *ThreadIDVar;
129  StringRef HelperName;
130 };
131 
132 /// API for captured statement code generation in OpenMP constructs.
133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
134 public:
135  class UntiedTaskActionTy final : public PrePostActionTy {
136  bool Untied;
137  const VarDecl *PartIDVar;
138  const RegionCodeGenTy UntiedCodeGen;
139  llvm::SwitchInst *UntiedSwitch = nullptr;
140 
141  public:
142  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
143  const RegionCodeGenTy &UntiedCodeGen)
144  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
145  void Enter(CodeGenFunction &CGF) override {
146  if (Untied) {
147  // Emit task switching point.
148  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
149  CGF.GetAddrOfLocalVar(PartIDVar),
150  PartIDVar->getType()->castAs<PointerType>());
151  llvm::Value *Res =
152  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
153  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
154  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
155  CGF.EmitBlock(DoneBB);
157  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
158  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
159  CGF.Builder.GetInsertBlock());
160  emitUntiedSwitch(CGF);
161  }
162  }
163  void emitUntiedSwitch(CodeGenFunction &CGF) const {
164  if (Untied) {
165  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
166  CGF.GetAddrOfLocalVar(PartIDVar),
167  PartIDVar->getType()->castAs<PointerType>());
168  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
169  PartIdLVal);
170  UntiedCodeGen(CGF);
171  CodeGenFunction::JumpDest CurPoint =
172  CGF.getJumpDestInCurrentScope(".untied.next.");
174  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
175  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
176  CGF.Builder.GetInsertBlock());
177  CGF.EmitBranchThroughCleanup(CurPoint);
178  CGF.EmitBlock(CurPoint.getBlock());
179  }
180  }
181  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
182  };
183  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
184  const VarDecl *ThreadIDVar,
185  const RegionCodeGenTy &CodeGen,
186  OpenMPDirectiveKind Kind, bool HasCancel,
187  const UntiedTaskActionTy &Action)
188  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
189  ThreadIDVar(ThreadIDVar), Action(Action) {
190  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
191  }
192 
193  /// Get a variable or parameter for storing global thread id
194  /// inside OpenMP construct.
195  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
196 
197  /// Get an LValue for the current ThreadID variable.
198  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
199 
200  /// Get the name of the capture helper.
201  StringRef getHelperName() const override { return ".omp_outlined."; }
202 
203  void emitUntiedSwitch(CodeGenFunction &CGF) override {
204  Action.emitUntiedSwitch(CGF);
205  }
206 
207  static bool classof(const CGCapturedStmtInfo *Info) {
208  return CGOpenMPRegionInfo::classof(Info) &&
209  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
210  TaskOutlinedRegion;
211  }
212 
213 private:
214  /// A variable or parameter storing global thread id for OpenMP
215  /// constructs.
216  const VarDecl *ThreadIDVar;
217  /// Action for emitting code for untied tasks.
218  const UntiedTaskActionTy &Action;
219 };
220 
221 /// API for inlined captured statement code generation in OpenMP
222 /// constructs.
223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
224 public:
225  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
226  const RegionCodeGenTy &CodeGen,
227  OpenMPDirectiveKind Kind, bool HasCancel)
228  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
229  OldCSI(OldCSI),
230  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
231 
232  // Retrieve the value of the context parameter.
233  llvm::Value *getContextValue() const override {
234  if (OuterRegionInfo)
235  return OuterRegionInfo->getContextValue();
236  llvm_unreachable("No context value for inlined OpenMP region");
237  }
238 
239  void setContextValue(llvm::Value *V) override {
240  if (OuterRegionInfo) {
241  OuterRegionInfo->setContextValue(V);
242  return;
243  }
244  llvm_unreachable("No context value for inlined OpenMP region");
245  }
246 
247  /// Lookup the captured field decl for a variable.
248  const FieldDecl *lookup(const VarDecl *VD) const override {
249  if (OuterRegionInfo)
250  return OuterRegionInfo->lookup(VD);
251  // If there is no outer outlined region,no need to lookup in a list of
252  // captured variables, we can use the original one.
253  return nullptr;
254  }
255 
256  FieldDecl *getThisFieldDecl() const override {
257  if (OuterRegionInfo)
258  return OuterRegionInfo->getThisFieldDecl();
259  return nullptr;
260  }
261 
262  /// Get a variable or parameter for storing global thread id
263  /// inside OpenMP construct.
264  const VarDecl *getThreadIDVariable() const override {
265  if (OuterRegionInfo)
266  return OuterRegionInfo->getThreadIDVariable();
267  return nullptr;
268  }
269 
270  /// Get an LValue for the current ThreadID variable.
271  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
272  if (OuterRegionInfo)
273  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
274  llvm_unreachable("No LValue for inlined OpenMP construct");
275  }
276 
277  /// Get the name of the capture helper.
278  StringRef getHelperName() const override {
279  if (auto *OuterRegionInfo = getOldCSI())
280  return OuterRegionInfo->getHelperName();
281  llvm_unreachable("No helper name for inlined OpenMP construct");
282  }
283 
284  void emitUntiedSwitch(CodeGenFunction &CGF) override {
285  if (OuterRegionInfo)
286  OuterRegionInfo->emitUntiedSwitch(CGF);
287  }
288 
289  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
290 
291  static bool classof(const CGCapturedStmtInfo *Info) {
292  return CGOpenMPRegionInfo::classof(Info) &&
293  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
294  }
295 
296  ~CGOpenMPInlinedRegionInfo() override = default;
297 
298 private:
299  /// CodeGen info about outer OpenMP region.
301  CGOpenMPRegionInfo *OuterRegionInfo;
302 };
303 
304 /// API for captured statement code generation in OpenMP target
305 /// constructs. For this captures, implicit parameters are used instead of the
306 /// captured fields. The name of the target region has to be unique in a given
307 /// application so it is provided by the client, because only the client has
308 /// the information to generate that.
309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
310 public:
311  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
312  const RegionCodeGenTy &CodeGen, StringRef HelperName)
313  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
314  /*HasCancel=*/false),
315  HelperName(HelperName) {}
316 
317  /// This is unused for target regions because each starts executing
318  /// with a single thread.
319  const VarDecl *getThreadIDVariable() const override { return nullptr; }
320 
321  /// Get the name of the capture helper.
322  StringRef getHelperName() const override { return HelperName; }
323 
324  static bool classof(const CGCapturedStmtInfo *Info) {
325  return CGOpenMPRegionInfo::classof(Info) &&
326  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
327  }
328 
329 private:
330  StringRef HelperName;
331 };
332 
333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
334  llvm_unreachable("No codegen for expressions");
335 }
336 /// API for generation of expressions captured in a innermost OpenMP
337 /// region.
338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
339 public:
340  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
341  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
342  OMPD_unknown,
343  /*HasCancel=*/false),
344  PrivScope(CGF) {
345  // Make sure the globals captured in the provided statement are local by
346  // using the privatization logic. We assume the same variable is not
347  // captured more than once.
348  for (const auto &C : CS.captures()) {
349  if (!C.capturesVariable() && !C.capturesVariableByCopy())
350  continue;
351 
352  const VarDecl *VD = C.getCapturedVar();
353  if (VD->isLocalVarDeclOrParm())
354  continue;
355 
356  DeclRefExpr DRE(const_cast<VarDecl *>(VD),
357  /*RefersToEnclosingVariableOrCapture=*/false,
359  C.getLocation());
360  PrivScope.addPrivate(
361  VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
362  }
363  (void)PrivScope.Privatize();
364  }
365 
366  /// Lookup the captured field decl for a variable.
367  const FieldDecl *lookup(const VarDecl *VD) const override {
368  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
369  return FD;
370  return nullptr;
371  }
372 
373  /// Emit the captured statement body.
374  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
375  llvm_unreachable("No body for expressions");
376  }
377 
378  /// Get a variable or parameter for storing global thread id
379  /// inside OpenMP construct.
380  const VarDecl *getThreadIDVariable() const override {
381  llvm_unreachable("No thread id for expressions");
382  }
383 
384  /// Get the name of the capture helper.
385  StringRef getHelperName() const override {
386  llvm_unreachable("No helper name for expressions");
387  }
388 
389  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
390 
391 private:
392  /// Private scope to capture global variables.
394 };
395 
396 /// RAII for emitting code of OpenMP constructs.
397 class InlinedOpenMPRegionRAII {
398  CodeGenFunction &CGF;
399  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
400  FieldDecl *LambdaThisCaptureField = nullptr;
401  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
402 
403 public:
404  /// Constructs region for combined constructs.
405  /// \param CodeGen Code generation sequence for combined directives. Includes
406  /// a list of functions used for code generation of implicitly inlined
407  /// regions.
408  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
409  OpenMPDirectiveKind Kind, bool HasCancel)
410  : CGF(CGF) {
411  // Start emission for the construct.
412  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
413  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
414  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
415  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
416  CGF.LambdaThisCaptureField = nullptr;
417  BlockInfo = CGF.BlockInfo;
418  CGF.BlockInfo = nullptr;
419  }
420 
421  ~InlinedOpenMPRegionRAII() {
422  // Restore original CapturedStmtInfo only if we're done with code emission.
423  auto *OldCSI =
424  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
425  delete CGF.CapturedStmtInfo;
426  CGF.CapturedStmtInfo = OldCSI;
427  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
429  CGF.BlockInfo = BlockInfo;
430  }
431 };
432 
433 /// Values for bit flags used in the ident_t to describe the fields.
434 /// All enumeric elements are named and described in accordance with the code
435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
436 enum OpenMPLocationFlags : unsigned {
437  /// Use trampoline for internal microtask.
438  OMP_IDENT_IMD = 0x01,
439  /// Use c-style ident structure.
440  OMP_IDENT_KMPC = 0x02,
441  /// Atomic reduction option for kmpc_reduce.
442  OMP_ATOMIC_REDUCE = 0x10,
443  /// Explicit 'barrier' directive.
444  OMP_IDENT_BARRIER_EXPL = 0x20,
445  /// Implicit barrier in code.
446  OMP_IDENT_BARRIER_IMPL = 0x40,
447  /// Implicit barrier in 'for' directive.
448  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
449  /// Implicit barrier in 'sections' directive.
450  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
451  /// Implicit barrier in 'single' directive.
452  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
453  /// Call of __kmp_for_static_init for static loop.
454  OMP_IDENT_WORK_LOOP = 0x200,
455  /// Call of __kmp_for_static_init for sections.
456  OMP_IDENT_WORK_SECTIONS = 0x400,
457  /// Call of __kmp_for_static_init for distribute.
458  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
459  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
460 };
461 
462 /// Describes ident structure that describes a source location.
463 /// All descriptions are taken from
464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
465 /// Original structure:
466 /// typedef struct ident {
467 /// kmp_int32 reserved_1; /**< might be used in Fortran;
468 /// see above */
469 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
470 /// KMP_IDENT_KMPC identifies this union
471 /// member */
472 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
473 /// see above */
474 ///#if USE_ITT_BUILD
475 /// /* but currently used for storing
476 /// region-specific ITT */
477 /// /* contextual information. */
478 ///#endif /* USE_ITT_BUILD */
479 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
480 /// C++ */
481 /// char const *psource; /**< String describing the source location.
482 /// The string is composed of semi-colon separated
483 // fields which describe the source file,
484 /// the function and a pair of line numbers that
485 /// delimit the construct.
486 /// */
487 /// } ident_t;
489  /// might be used in Fortran
491  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
493  /// Not really used in Fortran any more
495  /// Source[4] in Fortran, do not use for C++
497  /// String describing the source location. The string is composed of
498  /// semi-colon separated fields which describe the source file, the function
499  /// and a pair of line numbers that delimit the construct.
501 };
502 
503 /// Schedule types for 'omp for' loops (these enumerators are taken from
504 /// the enum sched_type in kmp.h).
506  /// Lower bound for default (unordered) versions.
514  /// static with chunk adjustment (e.g., simd)
516  /// Lower bound for 'ordered' versions.
525  /// dist_schedule types
528  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
529  /// Set if the monotonic schedule modifier was present.
531  /// Set if the nonmonotonic schedule modifier was present.
533 };
534 
536  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
537  /// kmpc_micro microtask, ...);
539  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
540  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
542  /// Call to void __kmpc_threadprivate_register( ident_t *,
543  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
545  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
547  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
548  // kmp_critical_name *crit);
550  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
551  // global_tid, kmp_critical_name *crit, uintptr_t hint);
553  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
554  // kmp_critical_name *crit);
556  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
557  // global_tid);
559  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
561  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
563  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
564  // global_tid);
566  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
567  // global_tid);
569  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
570  // kmp_int32 num_threads);
572  // Call to void __kmpc_flush(ident_t *loc);
574  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
576  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
578  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
579  // int end_part);
581  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
583  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
585  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
586  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
587  // kmp_routine_entry_t *task_entry);
589  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
590  // new_task);
592  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
593  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
594  // kmp_int32 didit);
596  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
597  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
598  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
600  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
601  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
602  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
603  // *lck);
605  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
606  // kmp_critical_name *lck);
608  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
609  // kmp_critical_name *lck);
611  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
612  // kmp_task_t * new_task);
614  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
615  // kmp_task_t * new_task);
617  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
619  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
621  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
622  // global_tid);
624  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
626  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
628  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
629  // int proc_bind);
631  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
632  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
633  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
635  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
636  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
637  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
639  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
640  // global_tid, kmp_int32 cncl_kind);
642  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
643  // kmp_int32 cncl_kind);
645  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
646  // kmp_int32 num_teams, kmp_int32 thread_limit);
648  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
649  // microtask, ...);
651  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
652  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
653  // sched, kmp_uint64 grainsize, void *task_dup);
655  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
656  // num_dims, struct kmp_dim *dims);
658  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
660  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
661  // *vec);
663  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
664  // *vec);
666  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
667  // *data);
669  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
670  // *d);
672 
673  //
674  // Offloading related calls
675  //
676  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
677  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
678  // *arg_types);
680  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
681  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
682  // *arg_types);
684  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
685  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
686  // *arg_types, int32_t num_teams, int32_t thread_limit);
688  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
689  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
690  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
692  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
694  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
696  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
697  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
699  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
700  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
701  // *arg_types);
703  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
704  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
706  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
707  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
708  // *arg_types);
710  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
711  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
713  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
714  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
715  // *arg_types);
717 };
718 
719 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
720 /// region.
721 class CleanupTy final : public EHScopeStack::Cleanup {
722  PrePostActionTy *Action;
723 
724 public:
725  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
726  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
727  if (!CGF.HaveInsertPoint())
728  return;
729  Action->Exit(CGF);
730  }
731 };
732 
733 } // anonymous namespace
734 
737  if (PrePostAction) {
738  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
739  Callback(CodeGen, CGF, *PrePostAction);
740  } else {
741  PrePostActionTy Action;
742  Callback(CodeGen, CGF, Action);
743  }
744 }
745 
746 /// Check if the combiner is a call to UDR combiner and if it is so return the
747 /// UDR decl used for reduction.
748 static const OMPDeclareReductionDecl *
749 getReductionInit(const Expr *ReductionOp) {
750  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
751  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
752  if (const auto *DRE =
753  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
754  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
755  return DRD;
756  return nullptr;
757 }
758 
760  const OMPDeclareReductionDecl *DRD,
761  const Expr *InitOp,
762  Address Private, Address Original,
763  QualType Ty) {
764  if (DRD->getInitializer()) {
765  std::pair<llvm::Function *, llvm::Function *> Reduction =
766  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
767  const auto *CE = cast<CallExpr>(InitOp);
768  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
769  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
770  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
771  const auto *LHSDRE =
772  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
773  const auto *RHSDRE =
774  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
775  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
776  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
777  [=]() { return Private; });
778  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
779  [=]() { return Original; });
780  (void)PrivateScope.Privatize();
781  RValue Func = RValue::get(Reduction.second);
782  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
783  CGF.EmitIgnoredExpr(InitOp);
784  } else {
785  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
786  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
787  auto *GV = new llvm::GlobalVariable(
788  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
789  llvm::GlobalValue::PrivateLinkage, Init, Name);
790  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
791  RValue InitRVal;
792  switch (CGF.getEvaluationKind(Ty)) {
793  case TEK_Scalar:
794  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
795  break;
796  case TEK_Complex:
797  InitRVal =
799  break;
800  case TEK_Aggregate:
801  InitRVal = RValue::getAggregate(LV.getAddress());
802  break;
803  }
804  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
805  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
806  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
807  /*IsInitializer=*/false);
808  }
809 }
810 
811 /// Emit initialization of arrays of complex types.
812 /// \param DestAddr Address of the array.
813 /// \param Type Type of array.
814 /// \param Init Initial expression of array.
815 /// \param SrcAddr Address of the original array.
816 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
817  QualType Type, bool EmitDeclareReductionInit,
818  const Expr *Init,
819  const OMPDeclareReductionDecl *DRD,
820  Address SrcAddr = Address::invalid()) {
821  // Perform element-by-element initialization.
822  QualType ElementTy;
823 
824  // Drill down to the base element type on both arrays.
825  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
826  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
827  DestAddr =
828  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
829  if (DRD)
830  SrcAddr =
831  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
832 
833  llvm::Value *SrcBegin = nullptr;
834  if (DRD)
835  SrcBegin = SrcAddr.getPointer();
836  llvm::Value *DestBegin = DestAddr.getPointer();
837  // Cast from pointer to array type to pointer to single element.
838  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
839  // The basic structure here is a while-do loop.
840  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
841  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
842  llvm::Value *IsEmpty =
843  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
844  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
845 
846  // Enter the loop body, making that address the current address.
847  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
848  CGF.EmitBlock(BodyBB);
849 
850  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
851 
852  llvm::PHINode *SrcElementPHI = nullptr;
853  Address SrcElementCurrent = Address::invalid();
854  if (DRD) {
855  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
856  "omp.arraycpy.srcElementPast");
857  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
858  SrcElementCurrent =
859  Address(SrcElementPHI,
860  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
861  }
862  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
863  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
864  DestElementPHI->addIncoming(DestBegin, EntryBB);
865  Address DestElementCurrent =
866  Address(DestElementPHI,
867  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
868 
869  // Emit copy.
870  {
871  CodeGenFunction::RunCleanupsScope InitScope(CGF);
872  if (EmitDeclareReductionInit) {
873  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
874  SrcElementCurrent, ElementTy);
875  } else
876  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
877  /*IsInitializer=*/false);
878  }
879 
880  if (DRD) {
881  // Shift the address forward by one element.
882  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
883  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
884  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
885  }
886 
887  // Shift the address forward by one element.
888  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
889  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
890  // Check whether we've reached the end.
891  llvm::Value *Done =
892  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
893  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
894  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
895 
896  // Done.
897  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
898 }
899 
900 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
901  return CGF.EmitOMPSharedLValue(E);
902 }
903 
904 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
905  const Expr *E) {
906  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
907  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
908  return LValue();
909 }
910 
911 void ReductionCodeGen::emitAggregateInitialization(
912  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
913  const OMPDeclareReductionDecl *DRD) {
914  // Emit VarDecl with copy init for arrays.
915  // Get the address of the original variable captured in current
916  // captured region.
917  const auto *PrivateVD =
918  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
919  bool EmitDeclareReductionInit =
920  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
921  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
922  EmitDeclareReductionInit,
923  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
924  : PrivateVD->getInit(),
925  DRD, SharedLVal.getAddress());
926 }
927 
930  ArrayRef<const Expr *> ReductionOps) {
931  ClausesData.reserve(Shareds.size());
932  SharedAddresses.reserve(Shareds.size());
933  Sizes.reserve(Shareds.size());
934  BaseDecls.reserve(Shareds.size());
935  auto IPriv = Privates.begin();
936  auto IRed = ReductionOps.begin();
937  for (const Expr *Ref : Shareds) {
938  ClausesData.emplace_back(Ref, *IPriv, *IRed);
939  std::advance(IPriv, 1);
940  std::advance(IRed, 1);
941  }
942 }
943 
944 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
945  assert(SharedAddresses.size() == N &&
946  "Number of generated lvalues must be exactly N.");
947  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
948  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
949  SharedAddresses.emplace_back(First, Second);
950 }
951 
953  const auto *PrivateVD =
954  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
955  QualType PrivateType = PrivateVD->getType();
956  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
957  if (!PrivateType->isVariablyModifiedType()) {
958  Sizes.emplace_back(
959  CGF.getTypeSize(
960  SharedAddresses[N].first.getType().getNonReferenceType()),
961  nullptr);
962  return;
963  }
964  llvm::Value *Size;
965  llvm::Value *SizeInChars;
966  auto *ElemType =
967  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
968  ->getElementType();
969  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
970  if (AsArraySection) {
971  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
972  SharedAddresses[N].first.getPointer());
973  Size = CGF.Builder.CreateNUWAdd(
974  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
975  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
976  } else {
977  SizeInChars = CGF.getTypeSize(
978  SharedAddresses[N].first.getType().getNonReferenceType());
979  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
980  }
981  Sizes.emplace_back(SizeInChars, Size);
983  CGF,
984  cast<OpaqueValueExpr>(
985  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
986  RValue::get(Size));
987  CGF.EmitVariablyModifiedType(PrivateType);
988 }
989 
991  llvm::Value *Size) {
992  const auto *PrivateVD =
993  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
994  QualType PrivateType = PrivateVD->getType();
995  if (!PrivateType->isVariablyModifiedType()) {
996  assert(!Size && !Sizes[N].second &&
997  "Size should be nullptr for non-variably modified reduction "
998  "items.");
999  return;
1000  }
1002  CGF,
1003  cast<OpaqueValueExpr>(
1004  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1005  RValue::get(Size));
1006  CGF.EmitVariablyModifiedType(PrivateType);
1007 }
1008 
1010  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1011  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1012  assert(SharedAddresses.size() > N && "No variable was generated");
1013  const auto *PrivateVD =
1014  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1015  const OMPDeclareReductionDecl *DRD =
1016  getReductionInit(ClausesData[N].ReductionOp);
1017  QualType PrivateType = PrivateVD->getType();
1018  PrivateAddr = CGF.Builder.CreateElementBitCast(
1019  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1020  QualType SharedType = SharedAddresses[N].first.getType();
1021  SharedLVal = CGF.MakeAddrLValue(
1022  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1023  CGF.ConvertTypeForMem(SharedType)),
1024  SharedType, SharedAddresses[N].first.getBaseInfo(),
1025  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1026  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1027  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1028  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1029  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1030  PrivateAddr, SharedLVal.getAddress(),
1031  SharedLVal.getType());
1032  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1033  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1034  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1035  PrivateVD->getType().getQualifiers(),
1036  /*IsInitializer=*/false);
1037  }
1038 }
1039 
1041  const auto *PrivateVD =
1042  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1043  QualType PrivateType = PrivateVD->getType();
1044  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1045  return DTorKind != QualType::DK_none;
1046 }
1047 
1049  Address PrivateAddr) {
1050  const auto *PrivateVD =
1051  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1052  QualType PrivateType = PrivateVD->getType();
1053  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1054  if (needCleanups(N)) {
1055  PrivateAddr = CGF.Builder.CreateElementBitCast(
1056  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1057  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1058  }
1059 }
1060 
1062  LValue BaseLV) {
1063  BaseTy = BaseTy.getNonReferenceType();
1064  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1065  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1066  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1067  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1068  } else {
1069  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1070  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1071  }
1072  BaseTy = BaseTy->getPointeeType();
1073  }
1074  return CGF.MakeAddrLValue(
1076  CGF.ConvertTypeForMem(ElTy)),
1077  BaseLV.getType(), BaseLV.getBaseInfo(),
1078  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1079 }
1080 
1082  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1083  llvm::Value *Addr) {
1084  Address Tmp = Address::invalid();
1085  Address TopTmp = Address::invalid();
1086  Address MostTopTmp = Address::invalid();
1087  BaseTy = BaseTy.getNonReferenceType();
1088  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1089  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1090  Tmp = CGF.CreateMemTemp(BaseTy);
1091  if (TopTmp.isValid())
1092  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1093  else
1094  MostTopTmp = Tmp;
1095  TopTmp = Tmp;
1096  BaseTy = BaseTy->getPointeeType();
1097  }
1098  llvm::Type *Ty = BaseLVType;
1099  if (Tmp.isValid())
1100  Ty = Tmp.getElementType();
1101  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1102  if (Tmp.isValid()) {
1103  CGF.Builder.CreateStore(Addr, Tmp);
1104  return MostTopTmp;
1105  }
1106  return Address(Addr, BaseLVAlignment);
1107 }
1108 
1109 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1110  const VarDecl *OrigVD = nullptr;
1111  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1112  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1113  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1114  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1115  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1116  Base = TempASE->getBase()->IgnoreParenImpCasts();
1117  DE = cast<DeclRefExpr>(Base);
1118  OrigVD = cast<VarDecl>(DE->getDecl());
1119  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1120  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1121  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1122  Base = TempASE->getBase()->IgnoreParenImpCasts();
1123  DE = cast<DeclRefExpr>(Base);
1124  OrigVD = cast<VarDecl>(DE->getDecl());
1125  }
1126  return OrigVD;
1127 }
1128 
1130  Address PrivateAddr) {
1131  const DeclRefExpr *DE;
1132  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1133  BaseDecls.emplace_back(OrigVD);
1134  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1135  LValue BaseLValue =
1136  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1137  OriginalBaseLValue);
1138  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1139  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1140  llvm::Value *PrivatePointer =
1142  PrivateAddr.getPointer(),
1143  SharedAddresses[N].first.getAddress().getType());
1144  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1145  return castToBase(CGF, OrigVD->getType(),
1146  SharedAddresses[N].first.getType(),
1147  OriginalBaseLValue.getAddress().getType(),
1148  OriginalBaseLValue.getAlignment(), Ptr);
1149  }
1150  BaseDecls.emplace_back(
1151  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1152  return PrivateAddr;
1153 }
1154 
1156  const OMPDeclareReductionDecl *DRD =
1157  getReductionInit(ClausesData[N].ReductionOp);
1158  return DRD && DRD->getInitializer();
1159 }
1160 
1161 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1162  return CGF.EmitLoadOfPointerLValue(
1163  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1164  getThreadIDVariable()->getType()->castAs<PointerType>());
1165 }
1166 
1167 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1168  if (!CGF.HaveInsertPoint())
1169  return;
1170  // 1.2.2 OpenMP Language Terminology
1171  // Structured block - An executable statement with a single entry at the
1172  // top and a single exit at the bottom.
1173  // The point of exit cannot be a branch out of the structured block.
1174  // longjmp() and throw() must not violate the entry/exit criteria.
1175  CGF.EHStack.pushTerminate();
1176  CodeGen(CGF);
1177  CGF.EHStack.popTerminate();
1178 }
1179 
1180 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1181  CodeGenFunction &CGF) {
1182  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1183  getThreadIDVariable()->getType(),
1185 }
1186 
1188  QualType FieldTy) {
1189  auto *Field = FieldDecl::Create(
1190  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1192  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1193  Field->setAccess(AS_public);
1194  DC->addDecl(Field);
1195  return Field;
1196 }
1197 
1198 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1199  StringRef Separator)
1200  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1202  ASTContext &C = CGM.getContext();
1203  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1204  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1205  RD->startDefinition();
1206  // reserved_1
1207  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1208  // flags
1209  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1210  // reserved_2
1211  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1212  // reserved_3
1213  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1214  // psource
1215  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1216  RD->completeDefinition();
1217  IdentQTy = C.getRecordType(RD);
1218  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1219  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1220 
1222 }
1223 
1224 void CGOpenMPRuntime::clear() {
1225  InternalVars.clear();
1226 }
1227 
1228 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1229  SmallString<128> Buffer;
1230  llvm::raw_svector_ostream OS(Buffer);
1231  StringRef Sep = FirstSeparator;
1232  for (StringRef Part : Parts) {
1233  OS << Sep << Part;
1234  Sep = Separator;
1235  }
1236  return OS.str();
1237 }
1238 
1239 static llvm::Function *
1241  const Expr *CombinerInitializer, const VarDecl *In,
1242  const VarDecl *Out, bool IsCombiner) {
1243  // void .omp_combiner.(Ty *in, Ty *out);
1244  ASTContext &C = CGM.getContext();
1245  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1246  FunctionArgList Args;
1247  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1248  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1249  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1250  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1251  Args.push_back(&OmpOutParm);
1252  Args.push_back(&OmpInParm);
1253  const CGFunctionInfo &FnInfo =
1255  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1256  std::string Name = CGM.getOpenMPRuntime().getName(
1257  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1259  Name, &CGM.getModule());
1260  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1261  Fn->removeFnAttr(llvm::Attribute::NoInline);
1262  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1263  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1264  CodeGenFunction CGF(CGM);
1265  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1266  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1267  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1268  Out->getLocation());
1270  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1271  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1272  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1273  .getAddress();
1274  });
1275  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1276  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1277  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1278  .getAddress();
1279  });
1280  (void)Scope.Privatize();
1281  if (!IsCombiner && Out->hasInit() &&
1282  !CGF.isTrivialInitializer(Out->getInit())) {
1283  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1284  Out->getType().getQualifiers(),
1285  /*IsInitializer=*/true);
1286  }
1287  if (CombinerInitializer)
1288  CGF.EmitIgnoredExpr(CombinerInitializer);
1289  Scope.ForceCleanup();
1290  CGF.FinishFunction();
1291  return Fn;
1292 }
1293 
1295  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1296  if (UDRMap.count(D) > 0)
1297  return;
1298  llvm::Function *Combiner = emitCombinerOrInitializer(
1299  CGM, D->getType(), D->getCombiner(),
1300  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1301  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1302  /*IsCombiner=*/true);
1303  llvm::Function *Initializer = nullptr;
1304  if (const Expr *Init = D->getInitializer()) {
1305  Initializer = emitCombinerOrInitializer(
1306  CGM, D->getType(),
1308  : nullptr,
1309  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1310  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1311  /*IsCombiner=*/false);
1312  }
1313  UDRMap.try_emplace(D, Combiner, Initializer);
1314  if (CGF) {
1315  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1316  Decls.second.push_back(D);
1317  }
1318 }
1319 
1320 std::pair<llvm::Function *, llvm::Function *>
1322  auto I = UDRMap.find(D);
1323  if (I != UDRMap.end())
1324  return I->second;
1325  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1326  return UDRMap.lookup(D);
1327 }
1328 
1330  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1331  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1332  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1333  assert(ThreadIDVar->getType()->isPointerType() &&
1334  "thread id variable must be of type kmp_int32 *");
1335  CodeGenFunction CGF(CGM, true);
1336  bool HasCancel = false;
1337  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1338  HasCancel = OPD->hasCancel();
1339  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1340  HasCancel = OPSD->hasCancel();
1341  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1342  HasCancel = OPFD->hasCancel();
1343  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1344  HasCancel = OPFD->hasCancel();
1345  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1346  HasCancel = OPFD->hasCancel();
1347  else if (const auto *OPFD =
1348  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1349  HasCancel = OPFD->hasCancel();
1350  else if (const auto *OPFD =
1351  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1352  HasCancel = OPFD->hasCancel();
1353  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1354  HasCancel, OutlinedHelperName);
1355  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1356  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1357 }
1358 
1360  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1361  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1362  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1364  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1365 }
1366 
1368  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1369  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1370  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1372  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1373 }
1374 
1376  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1377  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1378  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1379  bool Tied, unsigned &NumberOfParts) {
1380  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1381  PrePostActionTy &) {
1382  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1383  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1384  llvm::Value *TaskArgs[] = {
1385  UpLoc, ThreadID,
1386  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1387  TaskTVar->getType()->castAs<PointerType>())
1388  .getPointer()};
1389  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1390  };
1391  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1392  UntiedCodeGen);
1393  CodeGen.setAction(Action);
1394  assert(!ThreadIDVar->getType()->isPointerType() &&
1395  "thread id variable must be of type kmp_int32 for tasks");
1396  const OpenMPDirectiveKind Region =
1397  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1398  : OMPD_task;
1399  const CapturedStmt *CS = D.getCapturedStmt(Region);
1400  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1401  CodeGenFunction CGF(CGM, true);
1402  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1403  InnermostKind,
1404  TD ? TD->hasCancel() : false, Action);
1405  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1406  llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS);
1407  if (!Tied)
1408  NumberOfParts = Action.getNumberOfParts();
1409  return Res;
1410 }
1411 
1413  const RecordDecl *RD, const CGRecordLayout &RL,
1414  ArrayRef<llvm::Constant *> Data) {
1415  llvm::StructType *StructTy = RL.getLLVMType();
1416  unsigned PrevIdx = 0;
1417  ConstantInitBuilder CIBuilder(CGM);
1418  auto DI = Data.begin();
1419  for (const FieldDecl *FD : RD->fields()) {
1420  unsigned Idx = RL.getLLVMFieldNo(FD);
1421  // Fill the alignment.
1422  for (unsigned I = PrevIdx; I < Idx; ++I)
1423  Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1424  PrevIdx = Idx + 1;
1425  Fields.add(*DI);
1426  ++DI;
1427  }
1428 }
1429 
1430 template <class... As>
1431 static llvm::GlobalVariable *
1433  ArrayRef<llvm::Constant *> Data, const Twine &Name,
1434  As &&... Args) {
1435  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1436  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1437  ConstantInitBuilder CIBuilder(CGM);
1438  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1439  buildStructValue(Fields, CGM, RD, RL, Data);
1440  return Fields.finishAndCreateGlobal(
1441  Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1442  std::forward<As>(Args)...);
1443 }
1444 
1445 template <typename T>
1446 static void
1448  ArrayRef<llvm::Constant *> Data,
1449  T &Parent) {
1450  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1451  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1452  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1453  buildStructValue(Fields, CGM, RD, RL, Data);
1454  Fields.finishAndAddTo(Parent);
1455 }
1456 
1457 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1458  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1459  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1460  if (!Entry) {
1461  if (!DefaultOpenMPPSource) {
1462  // Initialize default location for psource field of ident_t structure of
1463  // all ident_t objects. Format is ";file;function;line;column;;".
1464  // Taken from
1465  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1466  DefaultOpenMPPSource =
1467  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1468  DefaultOpenMPPSource =
1469  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1470  }
1471 
1472  llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1473  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1474  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1475  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1476  DefaultOpenMPPSource};
1477  llvm::GlobalValue *DefaultOpenMPLocation =
1478  createGlobalStruct(CGM, IdentQTy, /*IsConstant=*/false, Data, "",
1479  llvm::GlobalValue::PrivateLinkage);
1480  DefaultOpenMPLocation->setUnnamedAddr(
1481  llvm::GlobalValue::UnnamedAddr::Global);
1482 
1483  OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1484  }
1485  return Address(Entry, Align);
1486 }
1487 
1489  SourceLocation Loc,
1490  unsigned Flags) {
1491  Flags |= OMP_IDENT_KMPC;
1492  // If no debug info is generated - return global default location.
1493  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1494  Loc.isInvalid())
1495  return getOrCreateDefaultLocation(Flags).getPointer();
1496 
1497  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1498 
1499  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1500  Address LocValue = Address::invalid();
1501  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1502  if (I != OpenMPLocThreadIDMap.end())
1503  LocValue = Address(I->second.DebugLoc, Align);
1504 
1505  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1506  // GetOpenMPThreadID was called before this routine.
1507  if (!LocValue.isValid()) {
1508  // Generate "ident_t .kmpc_loc.addr;"
1509  Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1510  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1511  Elem.second.DebugLoc = AI.getPointer();
1512  LocValue = AI;
1513 
1514  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1515  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1516  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1517  CGF.getTypeSize(IdentQTy));
1518  }
1519 
1520  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1521  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1522  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1523  LValue PSource =
1524  CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1525 
1526  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1527  if (OMPDebugLoc == nullptr) {
1528  SmallString<128> Buffer2;
1529  llvm::raw_svector_ostream OS2(Buffer2);
1530  // Build debug location
1532  OS2 << ";" << PLoc.getFilename() << ";";
1533  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1534  OS2 << FD->getQualifiedNameAsString();
1535  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1536  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1537  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1538  }
1539  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1540  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1541 
1542  // Our callers always pass this to a runtime function, so for
1543  // convenience, go ahead and return a naked pointer.
1544  return LocValue.getPointer();
1545 }
1546 
1548  SourceLocation Loc) {
1549  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1550 
1551  llvm::Value *ThreadID = nullptr;
1552  // Check whether we've already cached a load of the thread id in this
1553  // function.
1554  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1555  if (I != OpenMPLocThreadIDMap.end()) {
1556  ThreadID = I->second.ThreadID;
1557  if (ThreadID != nullptr)
1558  return ThreadID;
1559  }
1560  // If exceptions are enabled, do not use parameter to avoid possible crash.
1561  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1562  !CGF.getLangOpts().CXXExceptions ||
1563  CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1564  if (auto *OMPRegionInfo =
1565  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1566  if (OMPRegionInfo->getThreadIDVariable()) {
1567  // Check if this an outlined function with thread id passed as argument.
1568  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1569  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1570  // If value loaded in entry block, cache it and use it everywhere in
1571  // function.
1572  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1573  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1574  Elem.second.ThreadID = ThreadID;
1575  }
1576  return ThreadID;
1577  }
1578  }
1579  }
1580 
1581  // This is not an outlined function region - need to call __kmpc_int32
1582  // kmpc_global_thread_num(ident_t *loc).
1583  // Generate thread id value and cache this value for use across the
1584  // function.
1585  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1586  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1587  llvm::CallInst *Call = CGF.Builder.CreateCall(
1589  emitUpdateLocation(CGF, Loc));
1590  Call->setCallingConv(CGF.getRuntimeCC());
1591  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1592  Elem.second.ThreadID = Call;
1593  return Call;
1594 }
1595 
1597  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1598  if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1599  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1600  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1601  for(auto *D : FunctionUDRMap[CGF.CurFn])
1602  UDRMap.erase(D);
1603  FunctionUDRMap.erase(CGF.CurFn);
1604  }
1605 }
1606 
1608  return IdentTy->getPointerTo();
1609 }
1610 
1612  if (!Kmpc_MicroTy) {
1613  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1614  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1615  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1616  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1617  }
1618  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1619 }
1620 
1621 llvm::Constant *
1623  llvm::Constant *RTLFn = nullptr;
1624  switch (static_cast<OpenMPRTLFunction>(Function)) {
1625  case OMPRTL__kmpc_fork_call: {
1626  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1627  // microtask, ...);
1628  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1630  auto *FnTy =
1631  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1632  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1633  break;
1634  }
1636  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1637  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1638  auto *FnTy =
1639  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1640  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1641  break;
1642  }
1644  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1645  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1646  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1648  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1649  auto *FnTy =
1650  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1651  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1652  break;
1653  }
1654  case OMPRTL__kmpc_critical: {
1655  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1656  // kmp_critical_name *crit);
1657  llvm::Type *TypeParams[] = {
1659  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1660  auto *FnTy =
1661  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1662  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1663  break;
1664  }
1666  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1667  // kmp_critical_name *crit, uintptr_t hint);
1668  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1669  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1670  CGM.IntPtrTy};
1671  auto *FnTy =
1672  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1673  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1674  break;
1675  }
1677  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1678  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1679  // typedef void *(*kmpc_ctor)(void *);
1680  auto *KmpcCtorTy =
1681  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1682  /*isVarArg*/ false)->getPointerTo();
1683  // typedef void *(*kmpc_cctor)(void *, void *);
1684  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1685  auto *KmpcCopyCtorTy =
1686  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1687  /*isVarArg*/ false)
1688  ->getPointerTo();
1689  // typedef void (*kmpc_dtor)(void *);
1690  auto *KmpcDtorTy =
1691  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1692  ->getPointerTo();
1693  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1694  KmpcCopyCtorTy, KmpcDtorTy};
1695  auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1696  /*isVarArg*/ false);
1697  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1698  break;
1699  }
1701  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1702  // kmp_critical_name *crit);
1703  llvm::Type *TypeParams[] = {
1705  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1706  auto *FnTy =
1707  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1708  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1709  break;
1710  }
1712  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1713  // global_tid);
1714  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1715  auto *FnTy =
1716  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1717  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1718  break;
1719  }
1720  case OMPRTL__kmpc_barrier: {
1721  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1722  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1723  auto *FnTy =
1724  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1725  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1726  break;
1727  }
1729  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1730  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1731  auto *FnTy =
1732  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1733  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1734  break;
1735  }
1737  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1738  // kmp_int32 num_threads)
1739  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1740  CGM.Int32Ty};
1741  auto *FnTy =
1742  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1743  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1744  break;
1745  }
1747  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1748  // global_tid);
1749  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1750  auto *FnTy =
1751  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1752  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1753  break;
1754  }
1756  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1757  // global_tid);
1758  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1759  auto *FnTy =
1760  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1761  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1762  break;
1763  }
1764  case OMPRTL__kmpc_flush: {
1765  // Build void __kmpc_flush(ident_t *loc);
1766  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1767  auto *FnTy =
1768  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1769  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1770  break;
1771  }
1772  case OMPRTL__kmpc_master: {
1773  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1774  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1775  auto *FnTy =
1776  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1777  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1778  break;
1779  }
1780  case OMPRTL__kmpc_end_master: {
1781  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1782  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1783  auto *FnTy =
1784  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1785  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1786  break;
1787  }
1789  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1790  // int end_part);
1791  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1792  auto *FnTy =
1793  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1794  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1795  break;
1796  }
1797  case OMPRTL__kmpc_single: {
1798  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1799  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1800  auto *FnTy =
1801  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1802  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1803  break;
1804  }
1805  case OMPRTL__kmpc_end_single: {
1806  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1807  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1808  auto *FnTy =
1809  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1810  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1811  break;
1812  }
1814  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1815  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1816  // kmp_routine_entry_t *task_entry);
1817  assert(KmpRoutineEntryPtrTy != nullptr &&
1818  "Type kmp_routine_entry_t must be created.");
1819  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1820  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1821  // Return void * and then cast to particular kmp_task_t type.
1822  auto *FnTy =
1823  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1824  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1825  break;
1826  }
1827  case OMPRTL__kmpc_omp_task: {
1828  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1829  // *new_task);
1830  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1831  CGM.VoidPtrTy};
1832  auto *FnTy =
1833  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1834  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1835  break;
1836  }
1837  case OMPRTL__kmpc_copyprivate: {
1838  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1839  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1840  // kmp_int32 didit);
1841  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1842  auto *CpyFnTy =
1843  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1844  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1845  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1846  CGM.Int32Ty};
1847  auto *FnTy =
1848  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1849  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1850  break;
1851  }
1852  case OMPRTL__kmpc_reduce: {
1853  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1854  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1855  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1856  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1857  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1858  /*isVarArg=*/false);
1859  llvm::Type *TypeParams[] = {
1861  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1862  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1863  auto *FnTy =
1864  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1865  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1866  break;
1867  }
1869  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1870  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1871  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1872  // *lck);
1873  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1874  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1875  /*isVarArg=*/false);
1876  llvm::Type *TypeParams[] = {
1878  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1879  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1880  auto *FnTy =
1881  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1882  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1883  break;
1884  }
1885  case OMPRTL__kmpc_end_reduce: {
1886  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1887  // kmp_critical_name *lck);
1888  llvm::Type *TypeParams[] = {
1890  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1891  auto *FnTy =
1892  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1893  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1894  break;
1895  }
1897  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1898  // kmp_critical_name *lck);
1899  llvm::Type *TypeParams[] = {
1901  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1902  auto *FnTy =
1903  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1904  RTLFn =
1905  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1906  break;
1907  }
1909  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1910  // *new_task);
1911  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1912  CGM.VoidPtrTy};
1913  auto *FnTy =
1914  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1915  RTLFn =
1916  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1917  break;
1918  }
1920  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1921  // *new_task);
1922  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1923  CGM.VoidPtrTy};
1924  auto *FnTy =
1925  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1926  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1927  /*Name=*/"__kmpc_omp_task_complete_if0");
1928  break;
1929  }
1930  case OMPRTL__kmpc_ordered: {
1931  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1932  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1933  auto *FnTy =
1934  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1935  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1936  break;
1937  }
1938  case OMPRTL__kmpc_end_ordered: {
1939  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1940  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1941  auto *FnTy =
1942  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1943  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1944  break;
1945  }
1947  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1948  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1949  auto *FnTy =
1950  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1951  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1952  break;
1953  }
1954  case OMPRTL__kmpc_taskgroup: {
1955  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1956  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1957  auto *FnTy =
1958  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1959  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1960  break;
1961  }
1963  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1964  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1965  auto *FnTy =
1966  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1967  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1968  break;
1969  }
1971  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1972  // int proc_bind)
1973  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1974  auto *FnTy =
1975  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1976  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1977  break;
1978  }
1980  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1981  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1982  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1983  llvm::Type *TypeParams[] = {
1986  auto *FnTy =
1987  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1988  RTLFn =
1989  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1990  break;
1991  }
1993  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1994  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1995  // kmp_depend_info_t *noalias_dep_list);
1996  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1999  auto *FnTy =
2000  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2001  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2002  break;
2003  }
2005  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2006  // global_tid, kmp_int32 cncl_kind)
2007  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2008  auto *FnTy =
2009  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2010  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2011  break;
2012  }
2013  case OMPRTL__kmpc_cancel: {
2014  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2015  // kmp_int32 cncl_kind)
2016  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2017  auto *FnTy =
2018  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2019  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2020  break;
2021  }
2023  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2024  // kmp_int32 num_teams, kmp_int32 num_threads)
2025  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2026  CGM.Int32Ty};
2027  auto *FnTy =
2028  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2029  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2030  break;
2031  }
2032  case OMPRTL__kmpc_fork_teams: {
2033  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2034  // microtask, ...);
2035  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037  auto *FnTy =
2038  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2039  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2040  break;
2041  }
2042  case OMPRTL__kmpc_taskloop: {
2043  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2044  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2045  // sched, kmp_uint64 grainsize, void *task_dup);
2046  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2047  CGM.IntTy,
2048  CGM.VoidPtrTy,
2049  CGM.IntTy,
2050  CGM.Int64Ty->getPointerTo(),
2051  CGM.Int64Ty->getPointerTo(),
2052  CGM.Int64Ty,
2053  CGM.IntTy,
2054  CGM.IntTy,
2055  CGM.Int64Ty,
2056  CGM.VoidPtrTy};
2057  auto *FnTy =
2058  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2059  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2060  break;
2061  }
2063  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2064  // num_dims, struct kmp_dim *dims);
2065  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2066  CGM.Int32Ty,
2067  CGM.Int32Ty,
2068  CGM.VoidPtrTy};
2069  auto *FnTy =
2070  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2071  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2072  break;
2073  }
2075  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2076  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2077  auto *FnTy =
2078  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2079  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2080  break;
2081  }
2083  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2084  // *vec);
2085  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2086  CGM.Int64Ty->getPointerTo()};
2087  auto *FnTy =
2088  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2089  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2090  break;
2091  }
2093  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2094  // *vec);
2095  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2096  CGM.Int64Ty->getPointerTo()};
2097  auto *FnTy =
2098  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2099  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2100  break;
2101  }
2103  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2104  // *data);
2105  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2106  auto *FnTy =
2107  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2108  RTLFn =
2109  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2110  break;
2111  }
2113  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2114  // *d);
2115  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2116  auto *FnTy =
2117  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2118  RTLFn = CGM.CreateRuntimeFunction(
2119  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2120  break;
2121  }
2122  case OMPRTL__tgt_target: {
2123  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2124  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2125  // *arg_types);
2126  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2127  CGM.VoidPtrTy,
2128  CGM.Int32Ty,
2129  CGM.VoidPtrPtrTy,
2130  CGM.VoidPtrPtrTy,
2131  CGM.SizeTy->getPointerTo(),
2132  CGM.Int64Ty->getPointerTo()};
2133  auto *FnTy =
2134  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2135  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2136  break;
2137  }
2139  // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2140  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2141  // int64_t *arg_types);
2142  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2143  CGM.VoidPtrTy,
2144  CGM.Int32Ty,
2145  CGM.VoidPtrPtrTy,
2146  CGM.VoidPtrPtrTy,
2147  CGM.SizeTy->getPointerTo(),
2148  CGM.Int64Ty->getPointerTo()};
2149  auto *FnTy =
2150  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2151  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2152  break;
2153  }
2154  case OMPRTL__tgt_target_teams: {
2155  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2156  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2157  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2158  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2159  CGM.VoidPtrTy,
2160  CGM.Int32Ty,
2161  CGM.VoidPtrPtrTy,
2162  CGM.VoidPtrPtrTy,
2163  CGM.SizeTy->getPointerTo(),
2164  CGM.Int64Ty->getPointerTo(),
2165  CGM.Int32Ty,
2166  CGM.Int32Ty};
2167  auto *FnTy =
2168  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2169  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2170  break;
2171  }
2173  // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2174  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2175  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2176  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2177  CGM.VoidPtrTy,
2178  CGM.Int32Ty,
2179  CGM.VoidPtrPtrTy,
2180  CGM.VoidPtrPtrTy,
2181  CGM.SizeTy->getPointerTo(),
2182  CGM.Int64Ty->getPointerTo(),
2183  CGM.Int32Ty,
2184  CGM.Int32Ty};
2185  auto *FnTy =
2186  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2187  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2188  break;
2189  }
2190  case OMPRTL__tgt_register_lib: {
2191  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2192  QualType ParamTy =
2194  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2195  auto *FnTy =
2196  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2197  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2198  break;
2199  }
2201  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2202  QualType ParamTy =
2204  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2205  auto *FnTy =
2206  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2207  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2208  break;
2209  }
2211  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2212  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2213  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2214  CGM.Int32Ty,
2215  CGM.VoidPtrPtrTy,
2216  CGM.VoidPtrPtrTy,
2217  CGM.SizeTy->getPointerTo(),
2218  CGM.Int64Ty->getPointerTo()};
2219  auto *FnTy =
2220  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2221  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2222  break;
2223  }
2225  // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2226  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2227  // *arg_types);
2228  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2229  CGM.Int32Ty,
2230  CGM.VoidPtrPtrTy,
2231  CGM.VoidPtrPtrTy,
2232  CGM.SizeTy->getPointerTo(),
2233  CGM.Int64Ty->getPointerTo()};
2234  auto *FnTy =
2235  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2236  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2237  break;
2238  }
2240  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2241  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2242  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2243  CGM.Int32Ty,
2244  CGM.VoidPtrPtrTy,
2245  CGM.VoidPtrPtrTy,
2246  CGM.SizeTy->getPointerTo(),
2247  CGM.Int64Ty->getPointerTo()};
2248  auto *FnTy =
2249  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2250  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2251  break;
2252  }
2254  // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2255  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2256  // *arg_types);
2257  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2258  CGM.Int32Ty,
2259  CGM.VoidPtrPtrTy,
2260  CGM.VoidPtrPtrTy,
2261  CGM.SizeTy->getPointerTo(),
2262  CGM.Int64Ty->getPointerTo()};
2263  auto *FnTy =
2264  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2265  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2266  break;
2267  }
2269  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2270  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2271  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2272  CGM.Int32Ty,
2273  CGM.VoidPtrPtrTy,
2274  CGM.VoidPtrPtrTy,
2275  CGM.SizeTy->getPointerTo(),
2276  CGM.Int64Ty->getPointerTo()};
2277  auto *FnTy =
2278  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2279  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2280  break;
2281  }
2283  // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2284  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2285  // *arg_types);
2286  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2287  CGM.Int32Ty,
2288  CGM.VoidPtrPtrTy,
2289  CGM.VoidPtrPtrTy,
2290  CGM.SizeTy->getPointerTo(),
2291  CGM.Int64Ty->getPointerTo()};
2292  auto *FnTy =
2293  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2294  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2295  break;
2296  }
2297  }
2298  assert(RTLFn && "Unable to find OpenMP runtime function");
2299  return RTLFn;
2300 }
2301 
2302 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2303  bool IVSigned) {
2304  assert((IVSize == 32 || IVSize == 64) &&
2305  "IV size is not compatible with the omp runtime");
2306  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2307  : "__kmpc_for_static_init_4u")
2308  : (IVSigned ? "__kmpc_for_static_init_8"
2309  : "__kmpc_for_static_init_8u");
2310  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2311  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2312  llvm::Type *TypeParams[] = {
2313  getIdentTyPointerTy(), // loc
2314  CGM.Int32Ty, // tid
2315  CGM.Int32Ty, // schedtype
2316  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2317  PtrTy, // p_lower
2318  PtrTy, // p_upper
2319  PtrTy, // p_stride
2320  ITy, // incr
2321  ITy // chunk
2322  };
2323  auto *FnTy =
2324  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2325  return CGM.CreateRuntimeFunction(FnTy, Name);
2326 }
2327 
2328 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2329  bool IVSigned) {
2330  assert((IVSize == 32 || IVSize == 64) &&
2331  "IV size is not compatible with the omp runtime");
2332  StringRef Name =
2333  IVSize == 32
2334  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2335  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2336  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2337  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2338  CGM.Int32Ty, // tid
2339  CGM.Int32Ty, // schedtype
2340  ITy, // lower
2341  ITy, // upper
2342  ITy, // stride
2343  ITy // chunk
2344  };
2345  auto *FnTy =
2346  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2347  return CGM.CreateRuntimeFunction(FnTy, Name);
2348 }
2349 
2350 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2351  bool IVSigned) {
2352  assert((IVSize == 32 || IVSize == 64) &&
2353  "IV size is not compatible with the omp runtime");
2354  StringRef Name =
2355  IVSize == 32
2356  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2357  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2358  llvm::Type *TypeParams[] = {
2359  getIdentTyPointerTy(), // loc
2360  CGM.Int32Ty, // tid
2361  };
2362  auto *FnTy =
2363  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2364  return CGM.CreateRuntimeFunction(FnTy, Name);
2365 }
2366 
2367 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2368  bool IVSigned) {
2369  assert((IVSize == 32 || IVSize == 64) &&
2370  "IV size is not compatible with the omp runtime");
2371  StringRef Name =
2372  IVSize == 32
2373  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2374  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2375  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2376  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2377  llvm::Type *TypeParams[] = {
2378  getIdentTyPointerTy(), // loc
2379  CGM.Int32Ty, // tid
2380  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2381  PtrTy, // p_lower
2382  PtrTy, // p_upper
2383  PtrTy // p_stride
2384  };
2385  auto *FnTy =
2386  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2387  return CGM.CreateRuntimeFunction(FnTy, Name);
2388 }
2389 
2391  if (CGM.getLangOpts().OpenMPSimd)
2392  return Address::invalid();
2394  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2395  if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2396  SmallString<64> PtrName;
2397  {
2398  llvm::raw_svector_ostream OS(PtrName);
2399  OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2400  }
2401  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2402  if (!Ptr) {
2403  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2405  PtrName);
2406  if (!CGM.getLangOpts().OpenMPIsDevice) {
2407  auto *GV = cast<llvm::GlobalVariable>(Ptr);
2408  GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2409  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2410  }
2411  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2412  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2413  }
2414  return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2415  }
2416  return Address::invalid();
2417 }
2418 
2419 llvm::Constant *
2421  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2423  // Lookup the entry, lazily creating it if necessary.
2424  std::string Suffix = getName({"cache", ""});
2426  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2427 }
2428 
2430  const VarDecl *VD,
2431  Address VDAddr,
2432  SourceLocation Loc) {
2433  if (CGM.getLangOpts().OpenMPUseTLS &&
2435  return VDAddr;
2436 
2437  llvm::Type *VarTy = VDAddr.getElementType();
2438  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2439  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2440  CGM.Int8PtrTy),
2443  return Address(CGF.EmitRuntimeCall(
2445  VDAddr.getAlignment());
2446 }
2447 
2449  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2450  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2451  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2452  // library.
2453  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2455  OMPLoc);
2456  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2457  // to register constructor/destructor for variable.
2458  llvm::Value *Args[] = {
2459  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2460  Ctor, CopyCtor, Dtor};
2461  CGF.EmitRuntimeCall(
2463 }
2464 
2466  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2467  bool PerformInit, CodeGenFunction *CGF) {
2468  if (CGM.getLangOpts().OpenMPUseTLS &&
2470  return nullptr;
2471 
2472  VD = VD->getDefinition(CGM.getContext());
2473  if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2474  ThreadPrivateWithDefinition.insert(VD);
2475  QualType ASTTy = VD->getType();
2476 
2477  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2478  const Expr *Init = VD->getAnyInitializer();
2479  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2480  // Generate function that re-emits the declaration's initializer into the
2481  // threadprivate copy of the variable VD
2482  CodeGenFunction CtorCGF(CGM);
2483  FunctionArgList Args;
2484  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2485  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2487  Args.push_back(&Dst);
2488 
2489  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2490  CGM.getContext().VoidPtrTy, Args);
2491  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2492  std::string Name = getName({"__kmpc_global_ctor_", ""});
2493  llvm::Function *Fn =
2494  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2495  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2496  Args, Loc, Loc);
2497  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2498  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2499  CGM.getContext().VoidPtrTy, Dst.getLocation());
2500  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2501  Arg = CtorCGF.Builder.CreateElementBitCast(
2502  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2503  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2504  /*IsInitializer=*/true);
2505  ArgVal = CtorCGF.EmitLoadOfScalar(
2506  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2507  CGM.getContext().VoidPtrTy, Dst.getLocation());
2508  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2509  CtorCGF.FinishFunction();
2510  Ctor = Fn;
2511  }
2512  if (VD->getType().isDestructedType() != QualType::DK_none) {
2513  // Generate function that emits destructor call for the threadprivate copy
2514  // of the variable VD
2515  CodeGenFunction DtorCGF(CGM);
2516  FunctionArgList Args;
2517  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2518  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2520  Args.push_back(&Dst);
2521 
2522  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2523  CGM.getContext().VoidTy, Args);
2524  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2525  std::string Name = getName({"__kmpc_global_dtor_", ""});
2526  llvm::Function *Fn =
2527  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2528  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2529  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2530  Loc, Loc);
2531  // Create a scope with an artificial location for the body of this function.
2532  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2533  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2534  DtorCGF.GetAddrOfLocalVar(&Dst),
2535  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2536  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2537  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2538  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2539  DtorCGF.FinishFunction();
2540  Dtor = Fn;
2541  }
2542  // Do not emit init function if it is not required.
2543  if (!Ctor && !Dtor)
2544  return nullptr;
2545 
2546  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2547  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2548  /*isVarArg=*/false)
2549  ->getPointerTo();
2550  // Copying constructor for the threadprivate variable.
2551  // Must be NULL - reserved by runtime, but currently it requires that this
2552  // parameter is always NULL. Otherwise it fires assertion.
2553  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2554  if (Ctor == nullptr) {
2555  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2556  /*isVarArg=*/false)
2557  ->getPointerTo();
2558  Ctor = llvm::Constant::getNullValue(CtorTy);
2559  }
2560  if (Dtor == nullptr) {
2561  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2562  /*isVarArg=*/false)
2563  ->getPointerTo();
2564  Dtor = llvm::Constant::getNullValue(DtorTy);
2565  }
2566  if (!CGF) {
2567  auto *InitFunctionTy =
2568  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2569  std::string Name = getName({"__omp_threadprivate_init_", ""});
2570  llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2571  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2572  CodeGenFunction InitCGF(CGM);
2573  FunctionArgList ArgList;
2574  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2575  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2576  Loc, Loc);
2577  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2578  InitCGF.FinishFunction();
2579  return InitFunction;
2580  }
2581  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2582  }
2583  return nullptr;
2584 }
2585 
2586 /// Obtain information that uniquely identifies a target entry. This
2587 /// consists of the file and device IDs as well as line number associated with
2588 /// the relevant entry source location.
2590  unsigned &DeviceID, unsigned &FileID,
2591  unsigned &LineNum) {
2593 
2594  // The loc should be always valid and have a file ID (the user cannot use
2595  // #pragma directives in macros)
2596 
2597  assert(Loc.isValid() && "Source location is expected to be always valid.");
2598 
2599  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2600  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2601 
2602  llvm::sys::fs::UniqueID ID;
2603  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2604  SM.getDiagnostics().Report(diag::err_cannot_open_file)
2605  << PLoc.getFilename() << EC.message();
2606 
2607  DeviceID = ID.getDevice();
2608  FileID = ID.getFile();
2609  LineNum = PLoc.getLine();
2610 }
2611 
2613  llvm::GlobalVariable *Addr,
2614  bool PerformInit) {
2616  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2617  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2618  return CGM.getLangOpts().OpenMPIsDevice;
2619  VD = VD->getDefinition(CGM.getContext());
2620  if (VD && !DeclareTargetWithDefinition.insert(VD).second)
2621  return CGM.getLangOpts().OpenMPIsDevice;
2622 
2623  QualType ASTTy = VD->getType();
2624 
2626  // Produce the unique prefix to identify the new target regions. We use
2627  // the source location of the variable declaration which we know to not
2628  // conflict with any target region.
2629  unsigned DeviceID;
2630  unsigned FileID;
2631  unsigned Line;
2632  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2633  SmallString<128> Buffer, Out;
2634  {
2635  llvm::raw_svector_ostream OS(Buffer);
2636  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2637  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2638  }
2639 
2640  const Expr *Init = VD->getAnyInitializer();
2641  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2642  llvm::Constant *Ctor;
2643  llvm::Constant *ID;
2644  if (CGM.getLangOpts().OpenMPIsDevice) {
2645  // Generate function that re-emits the declaration's initializer into
2646  // the threadprivate copy of the variable VD
2647  CodeGenFunction CtorCGF(CGM);
2648 
2650  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2651  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2652  FTy, Twine(Buffer, "_ctor"), FI, Loc);
2653  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2654  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2655  FunctionArgList(), Loc, Loc);
2656  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2657  CtorCGF.EmitAnyExprToMem(Init,
2658  Address(Addr, CGM.getContext().getDeclAlign(VD)),
2659  Init->getType().getQualifiers(),
2660  /*IsInitializer=*/true);
2661  CtorCGF.FinishFunction();
2662  Ctor = Fn;
2663  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2664  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2665  } else {
2666  Ctor = new llvm::GlobalVariable(
2667  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2668  llvm::GlobalValue::PrivateLinkage,
2669  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2670  ID = Ctor;
2671  }
2672 
2673  // Register the information for the entry associated with the constructor.
2674  Out.clear();
2676  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2678  }
2679  if (VD->getType().isDestructedType() != QualType::DK_none) {
2680  llvm::Constant *Dtor;
2681  llvm::Constant *ID;
2682  if (CGM.getLangOpts().OpenMPIsDevice) {
2683  // Generate function that emits destructor call for the threadprivate
2684  // copy of the variable VD
2685  CodeGenFunction DtorCGF(CGM);
2686 
2688  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2689  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2690  FTy, Twine(Buffer, "_dtor"), FI, Loc);
2691  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2692  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2693  FunctionArgList(), Loc, Loc);
2694  // Create a scope with an artificial location for the body of this
2695  // function.
2696  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2697  DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2698  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2699  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2700  DtorCGF.FinishFunction();
2701  Dtor = Fn;
2702  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2703  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2704  } else {
2705  Dtor = new llvm::GlobalVariable(
2706  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2707  llvm::GlobalValue::PrivateLinkage,
2708  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2709  ID = Dtor;
2710  }
2711  // Register the information for the entry associated with the destructor.
2712  Out.clear();
2714  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2716  }
2717  return CGM.getLangOpts().OpenMPIsDevice;
2718 }
2719 
2721  QualType VarType,
2722  StringRef Name) {
2723  std::string Suffix = getName({"artificial", ""});
2724  std::string CacheSuffix = getName({"cache", ""});
2725  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2726  llvm::Value *GAddr =
2727  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2728  llvm::Value *Args[] = {
2730  getThreadID(CGF, SourceLocation()),
2732  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2733  /*IsSigned=*/false),
2735  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2736  return Address(
2738  CGF.EmitRuntimeCall(
2740  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2741  CGM.getPointerAlign());
2742 }
2743 
2745  const RegionCodeGenTy &ThenGen,
2746  const RegionCodeGenTy &ElseGen) {
2747  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2748 
2749  // If the condition constant folds and can be elided, try to avoid emitting
2750  // the condition and the dead arm of the if/else.
2751  bool CondConstant;
2752  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2753  if (CondConstant)
2754  ThenGen(CGF);
2755  else
2756  ElseGen(CGF);
2757  return;
2758  }
2759 
2760  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2761  // emit the conditional branch.
2762  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2763  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2764  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2765  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2766 
2767  // Emit the 'then' code.
2768  CGF.EmitBlock(ThenBlock);
2769  ThenGen(CGF);
2770  CGF.EmitBranch(ContBlock);
2771  // Emit the 'else' code if present.
2772  // There is no need to emit line number for unconditional branch.
2774  CGF.EmitBlock(ElseBlock);
2775  ElseGen(CGF);
2776  // There is no need to emit line number for unconditional branch.
2778  CGF.EmitBranch(ContBlock);
2779  // Emit the continuation block for code after the if.
2780  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2781 }
2782 
2784  llvm::Value *OutlinedFn,
2785  ArrayRef<llvm::Value *> CapturedVars,
2786  const Expr *IfCond) {
2787  if (!CGF.HaveInsertPoint())
2788  return;
2789  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2790  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2791  PrePostActionTy &) {
2792  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2793  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2794  llvm::Value *Args[] = {
2795  RTLoc,
2796  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2797  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2799  RealArgs.append(std::begin(Args), std::end(Args));
2800  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2801 
2802  llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2803  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2804  };
2805  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2806  PrePostActionTy &) {
2807  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2808  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2809  // Build calls:
2810  // __kmpc_serialized_parallel(&Loc, GTid);
2811  llvm::Value *Args[] = {RTLoc, ThreadID};
2812  CGF.EmitRuntimeCall(
2813  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2814 
2815  // OutlinedFn(&GTid, &zero, CapturedStruct);
2816  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2817  /*Name*/ ".zero.addr");
2818  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2819  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2820  // ThreadId for serialized parallels is 0.
2821  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2822  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2823  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2824  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2825 
2826  // __kmpc_end_serialized_parallel(&Loc, GTid);
2827  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2828  CGF.EmitRuntimeCall(
2829  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2830  EndArgs);
2831  };
2832  if (IfCond) {
2833  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2834  } else {
2835  RegionCodeGenTy ThenRCG(ThenGen);
2836  ThenRCG(CGF);
2837  }
2838 }
2839 
2840 // If we're inside an (outlined) parallel region, use the region info's
2841 // thread-ID variable (it is passed in a first argument of the outlined function
2842 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2843 // regular serial code region, get thread ID by calling kmp_int32
2844 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2845 // return the address of that temp.
2847  SourceLocation Loc) {
2848  if (auto *OMPRegionInfo =
2849  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2850  if (OMPRegionInfo->getThreadIDVariable())
2851  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2852 
2853  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2854  QualType Int32Ty =
2855  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2856  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2857  CGF.EmitStoreOfScalar(ThreadID,
2858  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2859 
2860  return ThreadIDTemp;
2861 }
2862 
2863 llvm::Constant *
2865  const llvm::Twine &Name) {
2866  SmallString<256> Buffer;
2867  llvm::raw_svector_ostream Out(Buffer);
2868  Out << Name;
2869  StringRef RuntimeName = Out.str();
2870  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2871  if (Elem.second) {
2872  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2873  "OMP internal variable has different type than requested");
2874  return &*Elem.second;
2875  }
2876 
2877  return Elem.second = new llvm::GlobalVariable(
2878  CGM.getModule(), Ty, /*IsConstant*/ false,
2879  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2880  Elem.first());
2881 }
2882 
2884  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2885  std::string Name = getName({Prefix, "var"});
2886  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2887 }
2888 
2889 namespace {
2890 /// Common pre(post)-action for different OpenMP constructs.
2891 class CommonActionTy final : public PrePostActionTy {
2892  llvm::Value *EnterCallee;
2893  ArrayRef<llvm::Value *> EnterArgs;
2894  llvm::Value *ExitCallee;
2895  ArrayRef<llvm::Value *> ExitArgs;
2896  bool Conditional;
2897  llvm::BasicBlock *ContBlock = nullptr;
2898 
2899 public:
2900  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2901  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2902  bool Conditional = false)
2903  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2904  ExitArgs(ExitArgs), Conditional(Conditional) {}
2905  void Enter(CodeGenFunction &CGF) override {
2906  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2907  if (Conditional) {
2908  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2909  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2910  ContBlock = CGF.createBasicBlock("omp_if.end");
2911  // Generate the branch (If-stmt)
2912  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2913  CGF.EmitBlock(ThenBlock);
2914  }
2915  }
2916  void Done(CodeGenFunction &CGF) {
2917  // Emit the rest of blocks/branches
2918  CGF.EmitBranch(ContBlock);
2919  CGF.EmitBlock(ContBlock, true);
2920  }
2921  void Exit(CodeGenFunction &CGF) override {
2922  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2923  }
2924 };
2925 } // anonymous namespace
2926 
2928  StringRef CriticalName,
2929  const RegionCodeGenTy &CriticalOpGen,
2930  SourceLocation Loc, const Expr *Hint) {
2931  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2932  // CriticalOpGen();
2933  // __kmpc_end_critical(ident_t *, gtid, Lock);
2934  // Prepare arguments and build a call to __kmpc_critical
2935  if (!CGF.HaveInsertPoint())
2936  return;
2937  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2938  getCriticalRegionLock(CriticalName)};
2939  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2940  std::end(Args));
2941  if (Hint) {
2942  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2943  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2944  }
2945  CommonActionTy Action(
2949  CriticalOpGen.setAction(Action);
2950  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2951 }
2952 
2954  const RegionCodeGenTy &MasterOpGen,
2955  SourceLocation Loc) {
2956  if (!CGF.HaveInsertPoint())
2957  return;
2958  // if(__kmpc_master(ident_t *, gtid)) {
2959  // MasterOpGen();
2960  // __kmpc_end_master(ident_t *, gtid);
2961  // }
2962  // Prepare arguments and build a call to __kmpc_master
2963  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2964  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2966  /*Conditional=*/true);
2967  MasterOpGen.setAction(Action);
2968  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2969  Action.Done(CGF);
2970 }
2971 
2973  SourceLocation Loc) {
2974  if (!CGF.HaveInsertPoint())
2975  return;
2976  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2977  llvm::Value *Args[] = {
2978  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2979  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2981  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2982  Region->emitUntiedSwitch(CGF);
2983 }
2984 
2986  const RegionCodeGenTy &TaskgroupOpGen,
2987  SourceLocation Loc) {
2988  if (!CGF.HaveInsertPoint())
2989  return;
2990  // __kmpc_taskgroup(ident_t *, gtid);
2991  // TaskgroupOpGen();
2992  // __kmpc_end_taskgroup(ident_t *, gtid);
2993  // Prepare arguments and build a call to __kmpc_taskgroup
2994  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2995  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2997  Args);
2998  TaskgroupOpGen.setAction(Action);
2999  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3000 }
3001 
3002 /// Given an array of pointers to variables, project the address of a
3003 /// given variable.
3005  unsigned Index, const VarDecl *Var) {
3006  // Pull out the pointer to the variable.
3007  Address PtrAddr =
3008  CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
3009  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3010 
3011  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3012  Addr = CGF.Builder.CreateElementBitCast(
3013  Addr, CGF.ConvertTypeForMem(Var->getType()));
3014  return Addr;
3015 }
3016 
3018  CodeGenModule &CGM, llvm::Type *ArgsType,
3019  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3020  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3021  SourceLocation Loc) {
3022  ASTContext &C = CGM.getContext();
3023  // void copy_func(void *LHSArg, void *RHSArg);
3024  FunctionArgList Args;
3025  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3027  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3029  Args.push_back(&LHSArg);
3030  Args.push_back(&RHSArg);
3031  const auto &CGFI =
3033  std::string Name =
3034  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3035  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3037  &CGM.getModule());
3038  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3039  Fn->setDoesNotRecurse();
3040  CodeGenFunction CGF(CGM);
3041  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3042  // Dest = (void*[n])(LHSArg);
3043  // Src = (void*[n])(RHSArg);
3045  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3046  ArgsType), CGF.getPointerAlign());
3048  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3049  ArgsType), CGF.getPointerAlign());
3050  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3051  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3052  // ...
3053  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3054  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3055  const auto *DestVar =
3056  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3057  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3058 
3059  const auto *SrcVar =
3060  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3061  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3062 
3063  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3064  QualType Type = VD->getType();
3065  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3066  }
3067  CGF.FinishFunction();
3068  return Fn;
3069 }
3070 
3072  const RegionCodeGenTy &SingleOpGen,
3073  SourceLocation Loc,
3074  ArrayRef<const Expr *> CopyprivateVars,
3075  ArrayRef<const Expr *> SrcExprs,
3076  ArrayRef<const Expr *> DstExprs,
3077  ArrayRef<const Expr *> AssignmentOps) {
3078  if (!CGF.HaveInsertPoint())
3079  return;
3080  assert(CopyprivateVars.size() == SrcExprs.size() &&
3081  CopyprivateVars.size() == DstExprs.size() &&
3082  CopyprivateVars.size() == AssignmentOps.size());
3083  ASTContext &C = CGM.getContext();
3084  // int32 did_it = 0;
3085  // if(__kmpc_single(ident_t *, gtid)) {
3086  // SingleOpGen();
3087  // __kmpc_end_single(ident_t *, gtid);
3088  // did_it = 1;
3089  // }
3090  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3091  // <copy_func>, did_it);
3092 
3093  Address DidIt = Address::invalid();
3094  if (!CopyprivateVars.empty()) {
3095  // int32 did_it = 0;
3096  QualType KmpInt32Ty =
3097  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3098  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3099  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3100  }
3101  // Prepare arguments and build a call to __kmpc_single
3102  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3103  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3105  /*Conditional=*/true);
3106  SingleOpGen.setAction(Action);
3107  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3108  if (DidIt.isValid()) {
3109  // did_it = 1;
3110  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3111  }
3112  Action.Done(CGF);
3113  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3114  // <copy_func>, did_it);
3115  if (DidIt.isValid()) {
3116  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3117  QualType CopyprivateArrayTy =
3118  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3119  /*IndexTypeQuals=*/0);
3120  // Create a list of all private variables for copyprivate.
3121  Address CopyprivateList =
3122  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3123  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3124  Address Elem = CGF.Builder.CreateConstArrayGEP(
3125  CopyprivateList, I, CGF.getPointerSize());
3126  CGF.Builder.CreateStore(
3128  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3129  Elem);
3130  }
3131  // Build function that copies private values from single region to all other
3132  // threads in the corresponding parallel region.
3134  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3135  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3136  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3137  Address CL =
3138  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3139  CGF.VoidPtrTy);
3140  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3141  llvm::Value *Args[] = {
3142  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3143  getThreadID(CGF, Loc), // i32 <gtid>
3144  BufSize, // size_t <buf_size>
3145  CL.getPointer(), // void *<copyprivate list>
3146  CpyFn, // void (*) (void *, void *) <copy_func>
3147  DidItVal // i32 did_it
3148  };
3150  }
3151 }
3152 
3154  const RegionCodeGenTy &OrderedOpGen,
3155  SourceLocation Loc, bool IsThreads) {
3156  if (!CGF.HaveInsertPoint())
3157  return;
3158  // __kmpc_ordered(ident_t *, gtid);
3159  // OrderedOpGen();
3160  // __kmpc_end_ordered(ident_t *, gtid);
3161  // Prepare arguments and build a call to __kmpc_ordered
3162  if (IsThreads) {
3163  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3164  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3166  Args);
3167  OrderedOpGen.setAction(Action);
3168  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3169  return;
3170  }
3171  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3172 }
3173 
3175  OpenMPDirectiveKind Kind, bool EmitChecks,
3176  bool ForceSimpleCall) {
3177  if (!CGF.HaveInsertPoint())
3178  return;
3179  // Build call __kmpc_cancel_barrier(loc, thread_id);
3180  // Build call __kmpc_barrier(loc, thread_id);
3181  unsigned Flags;
3182  if (Kind == OMPD_for)
3183  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3184  else if (Kind == OMPD_sections)
3185  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3186  else if (Kind == OMPD_single)
3187  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3188  else if (Kind == OMPD_barrier)
3189  Flags = OMP_IDENT_BARRIER_EXPL;
3190  else
3191  Flags = OMP_IDENT_BARRIER_IMPL;
3192  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3193  // thread_id);
3194  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3195  getThreadID(CGF, Loc)};
3196  if (auto *OMPRegionInfo =
3197  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3198  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3199  llvm::Value *Result = CGF.EmitRuntimeCall(
3201  if (EmitChecks) {
3202  // if (__kmpc_cancel_barrier()) {
3203  // exit from construct;
3204  // }
3205  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3206  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3207  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3208  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3209  CGF.EmitBlock(ExitBB);
3210  // exit from construct;
3211  CodeGenFunction::JumpDest CancelDestination =
3212  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3213  CGF.EmitBranchThroughCleanup(CancelDestination);
3214  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3215  }
3216  return;
3217  }
3218  }
3220 }
3221 
3222 /// Map the OpenMP loop schedule to the runtime enumeration.
3224  bool Chunked, bool Ordered) {
3225  switch (ScheduleKind) {
3226  case OMPC_SCHEDULE_static:
3227  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3228  : (Ordered ? OMP_ord_static : OMP_sch_static);
3229  case OMPC_SCHEDULE_dynamic:
3231  case OMPC_SCHEDULE_guided:
3233  case OMPC_SCHEDULE_runtime:
3234  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3235  case OMPC_SCHEDULE_auto:
3236  return Ordered ? OMP_ord_auto : OMP_sch_auto;
3237  case OMPC_SCHEDULE_unknown:
3238  assert(!Chunked && "chunk was specified but schedule kind not known");
3239  return Ordered ? OMP_ord_static : OMP_sch_static;
3240  }
3241  llvm_unreachable("Unexpected runtime schedule");
3242 }
3243 
3244 /// Map the OpenMP distribute schedule to the runtime enumeration.
3245 static OpenMPSchedType
3247  // only static is allowed for dist_schedule
3249 }
3250 
3252  bool Chunked) const {
3253  OpenMPSchedType Schedule =
3254  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3255  return Schedule == OMP_sch_static;
3256 }
3257 
3259  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3260  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3261  return Schedule == OMP_dist_sch_static;
3262 }
3263 
3264 
3266  OpenMPSchedType Schedule =
3267  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3268  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3269  return Schedule != OMP_sch_static;
3270 }
3271 
3275  int Modifier = 0;
3276  switch (M1) {
3277  case OMPC_SCHEDULE_MODIFIER_monotonic:
3278  Modifier = OMP_sch_modifier_monotonic;
3279  break;
3280  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3281  Modifier = OMP_sch_modifier_nonmonotonic;
3282  break;
3283  case OMPC_SCHEDULE_MODIFIER_simd:
3284  if (Schedule == OMP_sch_static_chunked)
3286  break;
3289  break;
3290  }
3291  switch (M2) {
3292  case OMPC_SCHEDULE_MODIFIER_monotonic:
3293  Modifier = OMP_sch_modifier_monotonic;
3294  break;
3295  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3296  Modifier = OMP_sch_modifier_nonmonotonic;
3297  break;
3298  case OMPC_SCHEDULE_MODIFIER_simd:
3299  if (Schedule == OMP_sch_static_chunked)
3301  break;
3304  break;
3305  }
3306  return Schedule | Modifier;
3307 }
3308 
3310  CodeGenFunction &CGF, SourceLocation Loc,
3311  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3312  bool Ordered, const DispatchRTInput &DispatchValues) {
3313  if (!CGF.HaveInsertPoint())
3314  return;
3316  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3317  assert(Ordered ||
3318  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3319  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3320  Schedule != OMP_sch_static_balanced_chunked));
3321  // Call __kmpc_dispatch_init(
3322  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3323  // kmp_int[32|64] lower, kmp_int[32|64] upper,
3324  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3325 
3326  // If the Chunk was not specified in the clause - use default value 1.
3327  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3328  : CGF.Builder.getIntN(IVSize, 1);
3329  llvm::Value *Args[] = {
3330  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3331  CGF.Builder.getInt32(addMonoNonMonoModifier(
3332  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3333  DispatchValues.LB, // Lower
3334  DispatchValues.UB, // Upper
3335  CGF.Builder.getIntN(IVSize, 1), // Stride
3336  Chunk // Chunk
3337  };
3338  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3339 }
3340 
3342  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3343  llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3345  const CGOpenMPRuntime::StaticRTInput &Values) {
3346  if (!CGF.HaveInsertPoint())
3347  return;
3348 
3349  assert(!Values.Ordered);
3350  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3351  Schedule == OMP_sch_static_balanced_chunked ||
3352  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3353  Schedule == OMP_dist_sch_static ||
3354  Schedule == OMP_dist_sch_static_chunked);
3355 
3356  // Call __kmpc_for_static_init(
3357  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3358  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3359  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3360  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3361  llvm::Value *Chunk = Values.Chunk;
3362  if (Chunk == nullptr) {
3363  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3364  Schedule == OMP_dist_sch_static) &&
3365  "expected static non-chunked schedule");
3366  // If the Chunk was not specified in the clause - use default value 1.
3367  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3368  } else {
3369  assert((Schedule == OMP_sch_static_chunked ||
3370  Schedule == OMP_sch_static_balanced_chunked ||
3371  Schedule == OMP_ord_static_chunked ||
3372  Schedule == OMP_dist_sch_static_chunked) &&
3373  "expected static chunked schedule");
3374  }
3375  llvm::Value *Args[] = {
3376  UpdateLocation,
3377  ThreadId,
3378  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3379  M2)), // Schedule type
3380  Values.IL.getPointer(), // &isLastIter
3381  Values.LB.getPointer(), // &LB
3382  Values.UB.getPointer(), // &UB
3383  Values.ST.getPointer(), // &Stride
3384  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3385  Chunk // Chunk
3386  };
3387  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3388 }
3389 
3391  SourceLocation Loc,
3392  OpenMPDirectiveKind DKind,
3393  const OpenMPScheduleTy &ScheduleKind,
3394  const StaticRTInput &Values) {
3395  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3396  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3397  assert(isOpenMPWorksharingDirective(DKind) &&
3398  "Expected loop-based or sections-based directive.");
3399  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3400  isOpenMPLoopDirective(DKind)
3401  ? OMP_IDENT_WORK_LOOP
3402  : OMP_IDENT_WORK_SECTIONS);
3403  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3404  llvm::Constant *StaticInitFunction =
3406  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3407  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3408 }
3409 
3411  CodeGenFunction &CGF, SourceLocation Loc,
3412  OpenMPDistScheduleClauseKind SchedKind,
3413  const CGOpenMPRuntime::StaticRTInput &Values) {
3414  OpenMPSchedType ScheduleNum =
3415  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3416  llvm::Value *UpdatedLocation =
3417  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3418  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3419  llvm::Constant *StaticInitFunction =
3420  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3421  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3422  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3424 }
3425 
3427  SourceLocation Loc,
3428  OpenMPDirectiveKind DKind) {
3429  if (!CGF.HaveInsertPoint())
3430  return;
3431  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3432  llvm::Value *Args[] = {
3433  emitUpdateLocation(CGF, Loc,
3435  ? OMP_IDENT_WORK_DISTRIBUTE
3436  : isOpenMPLoopDirective(DKind)
3437  ? OMP_IDENT_WORK_LOOP
3438  : OMP_IDENT_WORK_SECTIONS),
3439  getThreadID(CGF, Loc)};
3441  Args);
3442 }
3443 
3445  SourceLocation Loc,
3446  unsigned IVSize,
3447  bool IVSigned) {
3448  if (!CGF.HaveInsertPoint())
3449  return;
3450  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3451  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3452  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3453 }
3454 
3456  SourceLocation Loc, unsigned IVSize,
3457  bool IVSigned, Address IL,
3458  Address LB, Address UB,
3459  Address ST) {
3460  // Call __kmpc_dispatch_next(
3461  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3462  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3463  // kmp_int[32|64] *p_stride);
3464  llvm::Value *Args[] = {
3465  emitUpdateLocation(CGF, Loc),
3466  getThreadID(CGF, Loc),
3467  IL.getPointer(), // &isLastIter
3468  LB.getPointer(), // &Lower
3469  UB.getPointer(), // &Upper
3470  ST.getPointer() // &Stride
3471  };
3472  llvm::Value *Call =
3473  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3474  return CGF.EmitScalarConversion(
3475  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3476  CGF.getContext().BoolTy, Loc);
3477 }
3478 
3480  llvm::Value *NumThreads,
3481  SourceLocation Loc) {
3482  if (!CGF.HaveInsertPoint())
3483  return;
3484  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3485  llvm::Value *Args[] = {
3486  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3487  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3489  Args);
3490 }
3491 
3493  OpenMPProcBindClauseKind ProcBind,
3494  SourceLocation Loc) {
3495  if (!CGF.HaveInsertPoint())
3496  return;
3497  // Constants for proc bind value accepted by the runtime.
3498  enum ProcBindTy {
3499  ProcBindFalse = 0,
3500  ProcBindTrue,
3501  ProcBindMaster,
3502  ProcBindClose,
3503  ProcBindSpread,
3504  ProcBindIntel,
3505  ProcBindDefault
3506  } RuntimeProcBind;
3507  switch (ProcBind) {
3508  case OMPC_PROC_BIND_master:
3509  RuntimeProcBind = ProcBindMaster;
3510  break;
3511  case OMPC_PROC_BIND_close:
3512  RuntimeProcBind = ProcBindClose;
3513  break;
3514  case OMPC_PROC_BIND_spread:
3515  RuntimeProcBind = ProcBindSpread;
3516  break;
3518  llvm_unreachable("Unsupported proc_bind value.");
3519  }
3520  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3521  llvm::Value *Args[] = {
3522  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3523  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3525 }
3526 
3527 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3528  SourceLocation Loc) {
3529  if (!CGF.HaveInsertPoint())
3530  return;
3531  // Build call void __kmpc_flush(ident_t *loc)
3533  emitUpdateLocation(CGF, Loc));
3534 }
3535 
3536 namespace {
3537 /// Indexes of fields for type kmp_task_t.
3539  /// List of shared variables.
3540  KmpTaskTShareds,
3541  /// Task routine.
3542  KmpTaskTRoutine,
3543  /// Partition id for the untied tasks.
3544  KmpTaskTPartId,
3545  /// Function with call of destructors for private variables.
3546  Data1,
3547  /// Task priority.
3548  Data2,
3549  /// (Taskloops only) Lower bound.
3550  KmpTaskTLowerBound,
3551  /// (Taskloops only) Upper bound.
3552  KmpTaskTUpperBound,
3553  /// (Taskloops only) Stride.
3554  KmpTaskTStride,
3555  /// (Taskloops only) Is last iteration flag.
3556  KmpTaskTLastIter,
3557  /// (Taskloops only) Reduction data.
3558  KmpTaskTReductions,
3559 };
3560 } // anonymous namespace
3561 
3562 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3563  return OffloadEntriesTargetRegion.empty() &&
3564  OffloadEntriesDeviceGlobalVar.empty();
3565 }
3566 
3567 /// Initialize target region entry.
3568 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3569  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3570  StringRef ParentName, unsigned LineNum,
3571  unsigned Order) {
3572  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3573  "only required for the device "
3574  "code generation.");
3575  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3576  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3577  OMPTargetRegionEntryTargetRegion);
3578  ++OffloadingEntriesNum;
3579 }
3580 
3581 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3582  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3583  StringRef ParentName, unsigned LineNum,
3584  llvm::Constant *Addr, llvm::Constant *ID,
3585  OMPTargetRegionEntryKind Flags) {
3586  // If we are emitting code for a target, the entry is already initialized,
3587  // only has to be registered.
3588  if (CGM.getLangOpts().OpenMPIsDevice) {
3589  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3590  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3592  "Unable to find target region on line '%0' in the device code.");
3593  CGM.getDiags().Report(DiagID) << LineNum;
3594  return;
3595  }
3596  auto &Entry =
3597  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3598  assert(Entry.isValid() && "Entry not initialized!");
3599  Entry.setAddress(Addr);
3600  Entry.setID(ID);
3601  Entry.setFlags(Flags);
3602  } else {
3603  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3604  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3605  ++OffloadingEntriesNum;
3606  }
3607 }
3608 
3609 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3610  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3611  unsigned LineNum) const {
3612  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3613  if (PerDevice == OffloadEntriesTargetRegion.end())
3614  return false;
3615  auto PerFile = PerDevice->second.find(FileID);
3616  if (PerFile == PerDevice->second.end())
3617  return false;
3618  auto PerParentName = PerFile->second.find(ParentName);
3619  if (PerParentName == PerFile->second.end())
3620  return false;
3621  auto PerLine = PerParentName->second.find(LineNum);
3622  if (PerLine == PerParentName->second.end())
3623  return false;
3624  // Fail if this entry is already registered.
3625  if (PerLine->second.getAddress() || PerLine->second.getID())
3626  return false;
3627  return true;
3628 }
3629 
3630 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3631  const OffloadTargetRegionEntryInfoActTy &Action) {
3632  // Scan all target region entries and perform the provided action.
3633  for (const auto &D : OffloadEntriesTargetRegion)
3634  for (const auto &F : D.second)
3635  for (const auto &P : F.second)
3636  for (const auto &L : P.second)
3637  Action(D.first, F.first, P.first(), L.first, L.second);
3638 }
3639 
3640 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3641  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3642  OMPTargetGlobalVarEntryKind Flags,
3643  unsigned Order) {
3644  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3645  "only required for the device "
3646  "code generation.");
3647  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3648  ++OffloadingEntriesNum;
3649 }
3650 
3651 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3652  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3653  CharUnits VarSize,
3654  OMPTargetGlobalVarEntryKind Flags,
3655  llvm::GlobalValue::LinkageTypes Linkage) {
3656  if (CGM.getLangOpts().OpenMPIsDevice) {
3657  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3658  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3659  "Entry not initialized!");
3660  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3661  "Resetting with the new address.");
3662  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3663  return;
3664  Entry.setAddress(Addr);
3665  Entry.setVarSize(VarSize);
3666  Entry.setLinkage(Linkage);
3667  } else {
3668  if (hasDeviceGlobalVarEntryInfo(VarName))
3669  return;
3670  OffloadEntriesDeviceGlobalVar.try_emplace(
3671  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3672  ++OffloadingEntriesNum;
3673  }
3674 }
3675 
3676 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3677  actOnDeviceGlobalVarEntriesInfo(
3678  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3679  // Scan all target region entries and perform the provided action.
3680  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3681  Action(E.getKey(), E.getValue());
3682 }
3683 
3684 llvm::Function *
3686  // If we don't have entries or if we are emitting code for the device, we
3687  // don't need to do anything.
3688  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3689  return nullptr;
3690 
3691  llvm::Module &M = CGM.getModule();
3692  ASTContext &C = CGM.getContext();
3693 
3694  // Get list of devices we care about
3695  const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3696 
3697  // We should be creating an offloading descriptor only if there are devices
3698  // specified.
3699  assert(!Devices.empty() && "No OpenMP offloading devices??");
3700 
3701  // Create the external variables that will point to the begin and end of the
3702  // host entries section. These will be defined by the linker.
3703  llvm::Type *OffloadEntryTy =
3705  std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3706  auto *HostEntriesBegin = new llvm::GlobalVariable(
3707  M, OffloadEntryTy, /*isConstant=*/true,
3708  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3709  EntriesBeginName);
3710  std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3711  auto *HostEntriesEnd =
3712  new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3714  /*Initializer=*/nullptr, EntriesEndName);
3715 
3716  // Create all device images
3717  auto *DeviceImageTy = cast<llvm::StructType>(
3719  ConstantInitBuilder DeviceImagesBuilder(CGM);
3720  ConstantArrayBuilder DeviceImagesEntries =
3721  DeviceImagesBuilder.beginArray(DeviceImageTy);
3722 
3723  for (const llvm::Triple &Device : Devices) {
3724  StringRef T = Device.getTriple();
3725  std::string BeginName = getName({"omp_offloading", "img_start", ""});
3726  auto *ImgBegin = new llvm::GlobalVariable(
3727  M, CGM.Int8Ty, /*isConstant=*/true,
3728  llvm::GlobalValue::ExternalWeakLinkage,
3729  /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3730  std::string EndName = getName({"omp_offloading", "img_end", ""});
3731  auto *ImgEnd = new llvm::GlobalVariable(
3732  M, CGM.Int8Ty, /*isConstant=*/true,
3733  llvm::GlobalValue::ExternalWeakLinkage,
3734  /*Initializer=*/nullptr, Twine(EndName).concat(T));
3735 
3736  llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3737  HostEntriesEnd};
3739  DeviceImagesEntries);
3740  }
3741 
3742  // Create device images global array.
3743  std::string ImagesName = getName({"omp_offloading", "device_images"});
3744  llvm::GlobalVariable *DeviceImages =
3745  DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3746  CGM.getPointerAlign(),
3747  /*isConstant=*/true);
3748  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3749 
3750  // This is a Zero array to be used in the creation of the constant expressions
3751  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3752  llvm::Constant::getNullValue(CGM.Int32Ty)};
3753 
3754  // Create the target region descriptor.
3755  llvm::Constant *Data[] = {
3756  llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3757  llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3758  DeviceImages, Index),
3759  HostEntriesBegin, HostEntriesEnd};
3760  std::string Descriptor = getName({"omp_offloading", "descriptor"});
3761  llvm::GlobalVariable *Desc = createGlobalStruct(
3762  CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3763 
3764  // Emit code to register or unregister the descriptor at execution
3765  // startup or closing, respectively.
3766 
3767  llvm::Function *UnRegFn;
3768  {
3769  FunctionArgList Args;
3771  Args.push_back(&DummyPtr);
3772 
3773  CodeGenFunction CGF(CGM);
3774  // Disable debug info for global (de-)initializer because they are not part
3775  // of some particular construct.
3776  CGF.disableDebugInfo();
3777  const auto &FI =
3779  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3780  std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3781  UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3782  CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3784  Desc);
3785  CGF.FinishFunction();
3786  }
3787  llvm::Function *RegFn;
3788  {
3789  CodeGenFunction CGF(CGM);
3790  // Disable debug info for global (de-)initializer because they are not part
3791  // of some particular construct.
3792  CGF.disableDebugInfo();
3793  const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3794  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3795 
3796  // Encode offload target triples into the registration function name. It
3797  // will serve as a comdat key for the registration/unregistration code for
3798  // this particular combination of offloading targets.
3799  SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
3800  RegFnNameParts[0] = "omp_offloading";
3801  RegFnNameParts[1] = "descriptor_reg";
3802  llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
3803  [](const llvm::Triple &T) -> const std::string& {
3804  return T.getTriple();
3805  });
3806  llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
3807  std::string Descriptor = getName(RegFnNameParts);
3808  RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3809  CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3811  // Create a variable to drive the registration and unregistration of the
3812  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3813  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3814  SourceLocation(), nullptr, C.CharTy,
3816  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3817  CGF.FinishFunction();
3818  }
3819  if (CGM.supportsCOMDAT()) {
3820  // It is sufficient to call registration function only once, so create a
3821  // COMDAT group for registration/unregistration functions and associated
3822  // data. That would reduce startup time and code size. Registration
3823  // function serves as a COMDAT group key.
3824  llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3825  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3826  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3827  RegFn->setComdat(ComdatKey);
3828  UnRegFn->setComdat(ComdatKey);
3829  DeviceImages->setComdat(ComdatKey);
3830  Desc->setComdat(ComdatKey);
3831  }
3832  return RegFn;
3833 }
3834 
3836  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3837  llvm::GlobalValue::LinkageTypes Linkage) {
3838  StringRef Name = Addr->getName();
3839  llvm::Module &M = CGM.getModule();
3840  llvm::LLVMContext &C = M.getContext();
3841 
3842  // Create constant string with the name.
3843  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3844 
3845  std::string StringName = getName({"omp_offloading", "entry_name"});
3846  auto *Str = new llvm::GlobalVariable(
3847  M, StrPtrInit->getType(), /*isConstant=*/true,
3848  llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3849  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3850 
3851  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3852  llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3853  llvm::ConstantInt::get(CGM.SizeTy, Size),
3854  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3855  llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3856  std::string EntryName = getName({"omp_offloading", "entry", ""});
3857  llvm::GlobalVariable *Entry = createGlobalStruct(
3858  CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3859  Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3860 
3861  // The entry has to be created in the section the linker expects it to be.
3862  std::string Section = getName({"omp_offloading", "entries"});
3863  Entry->setSection(Section);
3864 }
3865 
3867  // Emit the offloading entries and metadata so that the device codegen side
3868  // can easily figure out what to emit. The produced metadata looks like
3869  // this:
3870  //
3871  // !omp_offload.info = !{!1, ...}
3872  //
3873  // Right now we only generate metadata for function that contain target
3874  // regions.
3875 
3876  // If we do not have entries, we don't need to do anything.
3878  return;
3879 
3880  llvm::Module &M = CGM.getModule();
3881  llvm::LLVMContext &C = M.getContext();
3883  OrderedEntries(OffloadEntriesInfoManager.size());
3884 
3885  // Auxiliary methods to create metadata values and strings.
3886  auto &&GetMDInt = [this](unsigned V) {
3887  return llvm::ConstantAsMetadata::get(
3888  llvm::ConstantInt::get(CGM.Int32Ty, V));
3889  };
3890 
3891  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3892 
3893  // Create the offloading info metadata node.
3894  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3895 
3896  // Create function that emits metadata for each target region entry;
3897  auto &&TargetRegionMetadataEmitter =
3898  [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
3899  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3900  unsigned Line,
3902  // Generate metadata for target regions. Each entry of this metadata
3903  // contains:
3904  // - Entry 0 -> Kind of this type of metadata (0).
3905  // - Entry 1 -> Device ID of the file where the entry was identified.
3906  // - Entry 2 -> File ID of the file where the entry was identified.
3907  // - Entry 3 -> Mangled name of the function where the entry was
3908  // identified.
3909  // - Entry 4 -> Line in the file where the entry was identified.
3910  // - Entry 5 -> Order the entry was created.
3911  // The first element of the metadata node is the kind.
3912  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3913  GetMDInt(FileID), GetMDString(ParentName),
3914  GetMDInt(Line), GetMDInt(E.getOrder())};
3915 
3916  // Save this entry in the right position of the ordered entries array.
3917  OrderedEntries[E.getOrder()] = &E;
3918 
3919  // Add metadata to the named metadata node.
3920  MD->addOperand(llvm::MDNode::get(C, Ops));
3921  };
3922 
3924  TargetRegionMetadataEmitter);
3925 
3926  // Create function that emits metadata for each device global variable entry;
3927  auto &&DeviceGlobalVarMetadataEmitter =
3928  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3929  MD](StringRef MangledName,
3931  &E) {
3932  // Generate metadata for global variables. Each entry of this metadata
3933  // contains:
3934  // - Entry 0 -> Kind of this type of metadata (1).
3935  // - Entry 1 -> Mangled name of the variable.
3936  // - Entry 2 -> Declare target kind.
3937  // - Entry 3 -> Order the entry was created.
3938  // The first element of the metadata node is the kind.
3939  llvm::Metadata *Ops[] = {
3940  GetMDInt(E.getKind()), GetMDString(MangledName),
3941  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3942 
3943  // Save this entry in the right position of the ordered entries array.
3944  OrderedEntries[E.getOrder()] = &E;
3945 
3946  // Add metadata to the named metadata node.
3947  MD->addOperand(llvm::MDNode::get(C, Ops));
3948  };
3949 
3951  DeviceGlobalVarMetadataEmitter);
3952 
3953  for (const auto *E : OrderedEntries) {
3954  assert(E && "All ordered entries must exist!");
3955  if (const auto *CE =
3956  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3957  E)) {
3958  if (!CE->getID() || !CE->getAddress()) {
3959  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3961  "Offloading entry for target region is incorrect: either the "
3962  "address or the ID is invalid.");
3963  CGM.getDiags().Report(DiagID);
3964  continue;
3965  }
3966  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3967  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3968  } else if (const auto *CE =
3969  dyn_cast<OffloadEntriesInfoManagerTy::
3970  OffloadEntryInfoDeviceGlobalVar>(E)) {
3973  CE->getFlags());
3974  switch (Flags) {
3976  if (!CE->getAddress()) {
3977  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3979  "Offloading entry for declare target variable is incorrect: the "
3980  "address is invalid.");
3981  CGM.getDiags().Report(DiagID);
3982  continue;
3983  }
3984  // The vaiable has no definition - no need to add the entry.
3985  if (CE->getVarSize().isZero())
3986  continue;
3987  break;
3988  }
3990  assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
3991  (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
3992  "Declaret target link address is set.");
3993  if (CGM.getLangOpts().OpenMPIsDevice)
3994  continue;
3995  if (!CE->getAddress()) {
3996  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3998  "Offloading entry for declare target variable is incorrect: the "
3999  "address is invalid.");
4000  CGM.getDiags().Report(DiagID);
4001  continue;
4002  }
4003  break;
4004  }
4005  createOffloadEntry(CE->getAddress(), CE->getAddress(),
4006  CE->getVarSize().getQuantity(), Flags,
4007  CE->getLinkage());
4008  } else {
4009  llvm_unreachable("Unsupported entry kind.");
4010  }
4011  }
4012 }
4013 
4014 /// Loads all the offload entries information from the host IR
4015 /// metadata.
4017  // If we are in target mode, load the metadata from the host IR. This code has
4018  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4019 
4020  if (!CGM.getLangOpts().OpenMPIsDevice)
4021  return;
4022 
4023  if (CGM.getLangOpts().OMPHostIRFile.empty())
4024  return;
4025 
4026  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4027  if (auto EC = Buf.getError()) {
4028  CGM.getDiags().Report(diag::err_cannot_open_file)
4029  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4030  return;
4031  }
4032 
4033  llvm::LLVMContext C;
4034  auto ME = expectedToErrorOrAndEmitErrors(
4035  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4036 
4037  if (auto EC = ME.getError()) {
4038  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4039  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4040  CGM.getDiags().Report(DiagID)
4041  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4042  return;
4043  }
4044 
4045  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4046  if (!MD)
4047  return;
4048 
4049  for (llvm::MDNode *MN : MD->operands()) {
4050  auto &&GetMDInt = [MN](unsigned Idx) {
4051  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4052  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4053  };
4054 
4055  auto &&GetMDString = [MN](unsigned Idx) {
4056  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4057  return V->getString();
4058  };
4059 
4060  switch (GetMDInt(0)) {
4061  default:
4062  llvm_unreachable("Unexpected metadata!");
4063  break;
4067  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4068  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4069  /*Order=*/GetMDInt(5));
4070  break;
4074  /*MangledName=*/GetMDString(1),
4075  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4076  /*Flags=*/GetMDInt(2)),
4077  /*Order=*/GetMDInt(3));
4078  break;
4079  }
4080  }
4081 }
4082 
4084  if (!KmpRoutineEntryPtrTy) {
4085  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4086  ASTContext &C = CGM.getContext();
4087  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4089  KmpRoutineEntryPtrQTy = C.getPointerType(
4090  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4091  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4092  }
4093 }
4094 
4096  // Make sure the type of the entry is already created. This is the type we
4097  // have to create:
4098  // struct __tgt_offload_entry{
4099  // void *addr; // Pointer to the offload entry info.
4100  // // (function or global)
4101  // char *name; // Name of the function or global.
4102  // size_t size; // Size of the entry info (0 if it a function).
4103  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4104  // int32_t reserved; // Reserved, to use by the runtime library.
4105  // };
4106  if (TgtOffloadEntryQTy.isNull()) {
4107  ASTContext &C = CGM.getContext();
4108  RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4109  RD->startDefinition();
4110  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4112  addFieldToRecordDecl(C, RD, C.getSizeType());
4114  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4116  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4117  RD->completeDefinition();
4118  RD->addAttr(PackedAttr::CreateImplicit(C));
4120  }
4121  return TgtOffloadEntryQTy;
4122 }
4123 
4125  // These are the types we need to build:
4126  // struct __tgt_device_image{
4127  // void *ImageStart; // Pointer to the target code start.
4128  // void *ImageEnd; // Pointer to the target code end.
4129  // // We also add the host entries to the device image, as it may be useful
4130  // // for the target runtime to have access to that information.
4131  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
4132  // // the entries.
4133  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4134  // // entries (non inclusive).
4135  // };
4136  if (TgtDeviceImageQTy.isNull()) {
4137  ASTContext &C = CGM.getContext();
4138  RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4139  RD->startDefinition();
4140  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4141  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4144  RD->completeDefinition();
4146  }
4147  return TgtDeviceImageQTy;
4148 }
4149 
4151  // struct __tgt_bin_desc{
4152  // int32_t NumDevices; // Number of devices supported.
4153  // __tgt_device_image *DeviceImages; // Arrays of device images
4154  // // (one per device).
4155  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
4156  // // entries.
4157  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4158  // // entries (non inclusive).
4159  // };
4161  ASTContext &C = CGM.getContext();
4162  RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4163  RD->startDefinition();
4165  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4169  RD->completeDefinition();
4171  }
4172  return TgtBinaryDescriptorQTy;
4173 }
4174 
4175 namespace {
4176 struct PrivateHelpersTy {
4177  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4178  const VarDecl *PrivateElemInit)
4179  : Original(Original), PrivateCopy(PrivateCopy),
4180  PrivateElemInit(PrivateElemInit) {}
4181  const VarDecl *Original;
4182  const VarDecl *PrivateCopy;
4183  const VarDecl *PrivateElemInit;
4184 };
4185 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4186 } // anonymous namespace
4187 
4188 static RecordDecl *
4190  if (!Privates.empty()) {
4191  ASTContext &C = CGM.getContext();
4192  // Build struct .kmp_privates_t. {
4193  // /* private vars */
4194  // };
4195  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4196  RD->startDefinition();
4197  for (const auto &Pair : Privates) {
4198  const VarDecl *VD = Pair.second.Original;
4199  QualType Type = VD->getType().getNonReferenceType();
4200  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4201  if (VD->hasAttrs()) {
4202  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4203  E(VD->getAttrs().end());
4204  I != E; ++I)
4205  FD->addAttr(*I);
4206  }
4207  }
4208  RD->completeDefinition();
4209  return RD;
4210  }
4211  return nullptr;
4212 }
4213 
4214 static RecordDecl *
4216  QualType KmpInt32Ty,
4217  QualType KmpRoutineEntryPointerQTy) {
4218  ASTContext &C = CGM.getContext();
4219  // Build struct kmp_task_t {
4220  // void * shareds;
4221  // kmp_routine_entry_t routine;
4222  // kmp_int32 part_id;
4223  // kmp_cmplrdata_t data1;
4224  // kmp_cmplrdata_t data2;
4225  // For taskloops additional fields:
4226  // kmp_uint64 lb;
4227  // kmp_uint64 ub;
4228  // kmp_int64 st;
4229  // kmp_int32 liter;
4230  // void * reductions;
4231  // };
4232  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4233  UD->startDefinition();
4234  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4235  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4236  UD->completeDefinition();
4237  QualType KmpCmplrdataTy = C.getRecordType(UD);
4238  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4239  RD->startDefinition();
4240  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4241  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4242  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4243  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4244  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4245  if (isOpenMPTaskLoopDirective(Kind)) {
4246  QualType KmpUInt64Ty =
4247  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4248  QualType KmpInt64Ty =
4249  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4250  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4251  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4252  addFieldToRecordDecl(C, RD, KmpInt64Ty);
4253  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4254  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4255  }
4256  RD->completeDefinition();
4257  return RD;
4258 }
4259 
4260 static RecordDecl *
4262  ArrayRef<PrivateDataTy> Privates) {
4263  ASTContext &C = CGM.getContext();
4264  // Build struct kmp_task_t_with_privates {
4265  // kmp_task_t task_data;
4266  // .kmp_privates_t. privates;
4267  // };
4268  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4269  RD->startDefinition();
4270  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4271  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4272  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4273  RD->completeDefinition();
4274  return RD;
4275 }
4276 
4277 /// Emit a proxy function which accepts kmp_task_t as the second
4278 /// argument.
4279 /// \code
4280 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4281 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4282 /// For taskloops:
4283 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4284 /// tt->reductions, tt->shareds);
4285 /// return 0;
4286 /// }
4287 /// \endcode
4288 static llvm::Value *
4290  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4291  QualType KmpTaskTWithPrivatesPtrQTy,
4292  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4293  QualType SharedsPtrTy, llvm::Value *TaskFunction,
4294  llvm::Value *TaskPrivatesMap) {
4295  ASTContext &C = CGM.getContext();
4296  FunctionArgList Args;
4297  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4299  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4300  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4302  Args.push_back(&GtidArg);
4303  Args.push_back(&TaskTypeArg);
4304  const auto &TaskEntryFnInfo =
4305  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4306  llvm::FunctionType *TaskEntryTy =
4307  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4308  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4309  auto *TaskEntry = llvm::Function::Create(
4310  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4311  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4312  TaskEntry->setDoesNotRecurse();
4313  CodeGenFunction CGF(CGM);
4314  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4315  Loc, Loc);
4316 
4317  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4318  // tt,
4319  // For taskloops:
4320  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4321  // tt->task_data.shareds);
4322  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4323  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4324  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4325  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4326  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4327  const auto *KmpTaskTWithPrivatesQTyRD =
4328  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4329  LValue Base =
4330  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4331  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4332  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4333  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4334  llvm::Value *PartidParam = PartIdLVal.getPointer();
4335 
4336  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4337  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4339  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4340  CGF.ConvertTypeForMem(SharedsPtrTy));
4341 
4342  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4343  llvm::Value *PrivatesParam;
4344  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4345  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4346  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4347  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4348  } else {
4349  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4350  }
4351 
4352  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4353  TaskPrivatesMap,
4354  CGF.Builder
4356  TDBase.getAddress(), CGF.VoidPtrTy)
4357  .getPointer()};
4358  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4359  std::end(CommonArgs));
4360  if (isOpenMPTaskLoopDirective(Kind)) {
4361  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4362  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4363  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4364  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4365  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4366  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4367  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4368  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4369  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4370  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4371  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4372  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4373  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4374  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4375  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4376  CallArgs.push_back(LBParam);
4377  CallArgs.push_back(UBParam);
4378  CallArgs.push_back(StParam);
4379  CallArgs.push_back(LIParam);
4380  CallArgs.push_back(RParam);
4381  }
4382  CallArgs.push_back(SharedsParam);
4383 
4384  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4385  CallArgs);
4386  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4387  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4388  CGF.FinishFunction();
4389  return TaskEntry;
4390 }
4391 
4393  SourceLocation Loc,
4394  QualType KmpInt32Ty,
4395  QualType KmpTaskTWithPrivatesPtrQTy,
4396  QualType KmpTaskTWithPrivatesQTy) {
4397  ASTContext &C = CGM.getContext();
4398  FunctionArgList Args;
4399  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4401  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4402  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4404  Args.push_back(&GtidArg);
4405  Args.push_back(&TaskTypeArg);
4406  const auto &DestructorFnInfo =
4407  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4408  llvm::FunctionType *DestructorFnTy =
4409  CGM.getTypes().GetFunctionType(DestructorFnInfo);
4410  std::string Name =
4411  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4412  auto *DestructorFn =
4414  Name, &CGM.getModule());
4415  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4416  DestructorFnInfo);
4417  DestructorFn->setDoesNotRecurse();
4418  CodeGenFunction CGF(CGM);
4419  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4420  Args, Loc, Loc);
4421 
4423  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4424  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4425  const auto *KmpTaskTWithPrivatesQTyRD =
4426  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4427  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4428  Base = CGF.EmitLValueForField(Base, *FI);
4429  for (const auto *Field :
4430  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4431  if (QualType::DestructionKind DtorKind =
4432  Field->getType().isDestructedType()) {
4433  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4434  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4435  }
4436  }
4437  CGF.FinishFunction();
4438  return DestructorFn;
4439 }
4440 
4441 /// Emit a privates mapping function for correct handling of private and
4442 /// firstprivate variables.
4443 /// \code
4444 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4445 /// **noalias priv1,..., <tyn> **noalias privn) {
4446 /// *priv1 = &.privates.priv1;
4447 /// ...;
4448 /// *privn = &.privates.privn;
4449 /// }
4450 /// \endcode
4451 static llvm::Value *
4453  ArrayRef<const Expr *> PrivateVars,
4454  ArrayRef<const Expr *> FirstprivateVars,
4455  ArrayRef<const Expr *> LastprivateVars,
4456  QualType PrivatesQTy,
4457  ArrayRef<PrivateDataTy> Privates) {
4458  ASTContext &C = CGM.getContext();
4459  FunctionArgList Args;
4460  ImplicitParamDecl TaskPrivatesArg(
4461  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4462  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4464  Args.push_back(&TaskPrivatesArg);
4465  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4466  unsigned Counter = 1;
4467  for (const Expr *E : PrivateVars) {
4468  Args.push_back(ImplicitParamDecl::Create(
4469  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4471  .withConst()
4472  .withRestrict(),
4474  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4475  PrivateVarsPos[VD] = Counter;
4476  ++Counter;
4477  }
4478  for (const Expr *E : FirstprivateVars) {
4479  Args.push_back(ImplicitParamDecl::Create(
4480  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4482  .withConst()
4483  .withRestrict(),
4485  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4486  PrivateVarsPos[VD] = Counter;
4487  ++Counter;
4488  }
4489  for (const Expr *E : LastprivateVars) {
4490  Args.push_back(ImplicitParamDecl::Create(
4491  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4493  .withConst()
4494  .withRestrict(),
4496  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4497  PrivateVarsPos[VD] = Counter;
4498  ++Counter;
4499  }
4500  const auto &TaskPrivatesMapFnInfo =
4502  llvm::FunctionType *TaskPrivatesMapTy =
4503  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4504  std::string Name =
4505  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4506  auto *TaskPrivatesMap = llvm::Function::Create(
4507  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4508  &CGM.getModule());
4509  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4510  TaskPrivatesMapFnInfo);
4511  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4512  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4513  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4514  CodeGenFunction CGF(CGM);
4515  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4516  TaskPrivatesMapFnInfo, Args, Loc, Loc);
4517 
4518  // *privi = &.privates.privi;
4520  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4521  TaskPrivatesArg.getType()->castAs<PointerType>());
4522  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4523  Counter = 0;
4524  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4525  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4526  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4527  LValue RefLVal =
4528  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4529  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4530  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4531  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4532  ++Counter;
4533  }
4534  CGF.FinishFunction();
4535  return TaskPrivatesMap;
4536 }
4537 
4538 static bool stable_sort_comparator(const PrivateDataTy P1,
4539  const PrivateDataTy P2) {
4540  return P1.first > P2.first;
4541 }
4542 
4543 /// Emit initialization for private variables in task-based directives.
4545  const OMPExecutableDirective &D,
4546  Address KmpTaskSharedsPtr, LValue TDBase,
4547  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4548  QualType SharedsTy, QualType SharedsPtrTy,
4549  const OMPTaskDataTy &Data,
4550  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4551  ASTContext &C = CGF.getContext();
4552  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4553  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4555  ? OMPD_taskloop
4556  : OMPD_task;
4557  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4558  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4559  LValue SrcBase;
4560  bool IsTargetTask =
4563  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4564  // PointersArray and SizesArray. The original variables for these arrays are
4565  // not captured and we get their addresses explicitly.
4566  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4567  (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4568  SrcBase = CGF.MakeAddrLValue(
4570  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4571  SharedsTy);
4572  }
4573  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4574  for (const PrivateDataTy &Pair : Privates) {
4575  const VarDecl *VD = Pair.second.PrivateCopy;
4576  const Expr *Init = VD->getAnyInitializer();
4577  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4578  !CGF.isTrivialInitializer(Init)))) {
4579  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4580  if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4581  const VarDecl *OriginalVD = Pair.second.Original;
4582  // Check if the variable is the target-based BasePointersArray,
4583  // PointersArray or SizesArray.
4584  LValue SharedRefLValue;
4585  QualType Type = OriginalVD->getType();
4586  const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4587  if (IsTargetTask && !SharedField) {
4588  assert(isa<ImplicitParamDecl>(OriginalVD) &&
4589  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4590  cast<CapturedDecl>(OriginalVD->getDeclContext())
4591  ->getNumParams() == 0 &&
4592  isa<TranslationUnitDecl>(
4593  cast<CapturedDecl>(OriginalVD->getDeclContext())
4594  ->getDeclContext()) &&
4595  "Expected artificial target data variable.");
4596  SharedRefLValue =
4597  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4598  } else {
4599  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4600  SharedRefLValue = CGF.MakeAddrLValue(
4601  Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4602  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4603  SharedRefLValue.getTBAAInfo());
4604  }
4605  if (Type->isArrayType()) {
4606  // Initialize firstprivate array.
4607  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4608  // Perform simple memcpy.
4609  CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4610  } else {
4611  // Initialize firstprivate array using element-by-element
4612  // initialization.
4614  PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4615  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4616  Address SrcElement) {
4617  // Clean up any temporaries needed by the initialization.
4618  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4619  InitScope.addPrivate(
4620  Elem, [SrcElement]() -> Address { return SrcElement; });
4621  (void)InitScope.Privatize();
4622  // Emit initialization for single element.
4624  CGF, &CapturesInfo);
4625  CGF.EmitAnyExprToMem(Init, DestElement,
4626  Init->getType().getQualifiers(),
4627  /*IsInitializer=*/false);
4628  });
4629  }
4630  } else {
4631  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4632  InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4633  return SharedRefLValue.getAddress();
4634  });
4635  (void)InitScope.Privatize();
4636  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4637  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4638  /*capturedByInit=*/false);
4639  }
4640  } else {
4641  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4642  }
4643  }
4644  ++FI;
4645  }