clang  7.0.0svn
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/AST/StmtOpenMP.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/BitmaskEnum.h"
24 #include "llvm/Bitcode/BitcodeReader.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalValue.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Support/Format.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <cassert>
32 
33 using namespace clang;
34 using namespace CodeGen;
35 
36 namespace {
37 /// Base class for handling code generation inside OpenMP regions.
38 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
39 public:
40  /// Kinds of OpenMP regions used in codegen.
41  enum CGOpenMPRegionKind {
42  /// Region with outlined function for standalone 'parallel'
43  /// directive.
44  ParallelOutlinedRegion,
45  /// Region with outlined function for standalone 'task' directive.
46  TaskOutlinedRegion,
47  /// Region for constructs that do not require function outlining,
48  /// like 'for', 'sections', 'atomic' etc. directives.
49  InlinedRegion,
50  /// Region with outlined function for standalone 'target' directive.
51  TargetRegion,
52  };
53 
54  CGOpenMPRegionInfo(const CapturedStmt &CS,
55  const CGOpenMPRegionKind RegionKind,
57  bool HasCancel)
58  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
59  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
60 
61  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
63  bool HasCancel)
64  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
65  Kind(Kind), HasCancel(HasCancel) {}
66 
67  /// Get a variable or parameter for storing global thread id
68  /// inside OpenMP construct.
69  virtual const VarDecl *getThreadIDVariable() const = 0;
70 
71  /// Emit the captured statement body.
72  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
73 
74  /// Get an LValue for the current ThreadID variable.
75  /// \return LValue for thread id variable. This LValue always has type int32*.
76  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
77 
78  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
79 
80  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
81 
82  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
83 
84  bool hasCancel() const { return HasCancel; }
85 
86  static bool classof(const CGCapturedStmtInfo *Info) {
87  return Info->getKind() == CR_OpenMP;
88  }
89 
90  ~CGOpenMPRegionInfo() override = default;
91 
92 protected:
93  CGOpenMPRegionKind RegionKind;
94  RegionCodeGenTy CodeGen;
96  bool HasCancel;
97 };
98 
99 /// API for captured statement code generation in OpenMP constructs.
100 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
101 public:
102  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
103  const RegionCodeGenTy &CodeGen,
104  OpenMPDirectiveKind Kind, bool HasCancel,
105  StringRef HelperName)
106  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
107  HasCancel),
108  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
109  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
110  }
111 
112  /// Get a variable or parameter for storing global thread id
113  /// inside OpenMP construct.
114  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
115 
116  /// Get the name of the capture helper.
117  StringRef getHelperName() const override { return HelperName; }
118 
119  static bool classof(const CGCapturedStmtInfo *Info) {
120  return CGOpenMPRegionInfo::classof(Info) &&
121  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
122  ParallelOutlinedRegion;
123  }
124 
125 private:
126  /// A variable or parameter storing global thread id for OpenMP
127  /// constructs.
128  const VarDecl *ThreadIDVar;
129  StringRef HelperName;
130 };
131 
132 /// API for captured statement code generation in OpenMP constructs.
133 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
134 public:
135  class UntiedTaskActionTy final : public PrePostActionTy {
136  bool Untied;
137  const VarDecl *PartIDVar;
138  const RegionCodeGenTy UntiedCodeGen;
139  llvm::SwitchInst *UntiedSwitch = nullptr;
140 
141  public:
142  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
143  const RegionCodeGenTy &UntiedCodeGen)
144  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
145  void Enter(CodeGenFunction &CGF) override {
146  if (Untied) {
147  // Emit task switching point.
148  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
149  CGF.GetAddrOfLocalVar(PartIDVar),
150  PartIDVar->getType()->castAs<PointerType>());
151  llvm::Value *Res =
152  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
153  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
154  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
155  CGF.EmitBlock(DoneBB);
157  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
158  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
159  CGF.Builder.GetInsertBlock());
160  emitUntiedSwitch(CGF);
161  }
162  }
163  void emitUntiedSwitch(CodeGenFunction &CGF) const {
164  if (Untied) {
165  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
166  CGF.GetAddrOfLocalVar(PartIDVar),
167  PartIDVar->getType()->castAs<PointerType>());
168  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
169  PartIdLVal);
170  UntiedCodeGen(CGF);
171  CodeGenFunction::JumpDest CurPoint =
172  CGF.getJumpDestInCurrentScope(".untied.next.");
174  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
175  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
176  CGF.Builder.GetInsertBlock());
177  CGF.EmitBranchThroughCleanup(CurPoint);
178  CGF.EmitBlock(CurPoint.getBlock());
179  }
180  }
181  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
182  };
183  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
184  const VarDecl *ThreadIDVar,
185  const RegionCodeGenTy &CodeGen,
186  OpenMPDirectiveKind Kind, bool HasCancel,
187  const UntiedTaskActionTy &Action)
188  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
189  ThreadIDVar(ThreadIDVar), Action(Action) {
190  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
191  }
192 
193  /// Get a variable or parameter for storing global thread id
194  /// inside OpenMP construct.
195  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
196 
197  /// Get an LValue for the current ThreadID variable.
198  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
199 
200  /// Get the name of the capture helper.
201  StringRef getHelperName() const override { return ".omp_outlined."; }
202 
203  void emitUntiedSwitch(CodeGenFunction &CGF) override {
204  Action.emitUntiedSwitch(CGF);
205  }
206 
207  static bool classof(const CGCapturedStmtInfo *Info) {
208  return CGOpenMPRegionInfo::classof(Info) &&
209  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
210  TaskOutlinedRegion;
211  }
212 
213 private:
214  /// A variable or parameter storing global thread id for OpenMP
215  /// constructs.
216  const VarDecl *ThreadIDVar;
217  /// Action for emitting code for untied tasks.
218  const UntiedTaskActionTy &Action;
219 };
220 
221 /// API for inlined captured statement code generation in OpenMP
222 /// constructs.
223 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
224 public:
225  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
226  const RegionCodeGenTy &CodeGen,
227  OpenMPDirectiveKind Kind, bool HasCancel)
228  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
229  OldCSI(OldCSI),
230  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
231 
232  // Retrieve the value of the context parameter.
233  llvm::Value *getContextValue() const override {
234  if (OuterRegionInfo)
235  return OuterRegionInfo->getContextValue();
236  llvm_unreachable("No context value for inlined OpenMP region");
237  }
238 
239  void setContextValue(llvm::Value *V) override {
240  if (OuterRegionInfo) {
241  OuterRegionInfo->setContextValue(V);
242  return;
243  }
244  llvm_unreachable("No context value for inlined OpenMP region");
245  }
246 
247  /// Lookup the captured field decl for a variable.
248  const FieldDecl *lookup(const VarDecl *VD) const override {
249  if (OuterRegionInfo)
250  return OuterRegionInfo->lookup(VD);
251  // If there is no outer outlined region,no need to lookup in a list of
252  // captured variables, we can use the original one.
253  return nullptr;
254  }
255 
256  FieldDecl *getThisFieldDecl() const override {
257  if (OuterRegionInfo)
258  return OuterRegionInfo->getThisFieldDecl();
259  return nullptr;
260  }
261 
262  /// Get a variable or parameter for storing global thread id
263  /// inside OpenMP construct.
264  const VarDecl *getThreadIDVariable() const override {
265  if (OuterRegionInfo)
266  return OuterRegionInfo->getThreadIDVariable();
267  return nullptr;
268  }
269 
270  /// Get an LValue for the current ThreadID variable.
271  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
272  if (OuterRegionInfo)
273  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
274  llvm_unreachable("No LValue for inlined OpenMP construct");
275  }
276 
277  /// Get the name of the capture helper.
278  StringRef getHelperName() const override {
279  if (auto *OuterRegionInfo = getOldCSI())
280  return OuterRegionInfo->getHelperName();
281  llvm_unreachable("No helper name for inlined OpenMP construct");
282  }
283 
284  void emitUntiedSwitch(CodeGenFunction &CGF) override {
285  if (OuterRegionInfo)
286  OuterRegionInfo->emitUntiedSwitch(CGF);
287  }
288 
289  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
290 
291  static bool classof(const CGCapturedStmtInfo *Info) {
292  return CGOpenMPRegionInfo::classof(Info) &&
293  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
294  }
295 
296  ~CGOpenMPInlinedRegionInfo() override = default;
297 
298 private:
299  /// CodeGen info about outer OpenMP region.
301  CGOpenMPRegionInfo *OuterRegionInfo;
302 };
303 
304 /// API for captured statement code generation in OpenMP target
305 /// constructs. For this captures, implicit parameters are used instead of the
306 /// captured fields. The name of the target region has to be unique in a given
307 /// application so it is provided by the client, because only the client has
308 /// the information to generate that.
309 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
310 public:
311  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
312  const RegionCodeGenTy &CodeGen, StringRef HelperName)
313  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
314  /*HasCancel=*/false),
315  HelperName(HelperName) {}
316 
317  /// This is unused for target regions because each starts executing
318  /// with a single thread.
319  const VarDecl *getThreadIDVariable() const override { return nullptr; }
320 
321  /// Get the name of the capture helper.
322  StringRef getHelperName() const override { return HelperName; }
323 
324  static bool classof(const CGCapturedStmtInfo *Info) {
325  return CGOpenMPRegionInfo::classof(Info) &&
326  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
327  }
328 
329 private:
330  StringRef HelperName;
331 };
332 
333 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
334  llvm_unreachable("No codegen for expressions");
335 }
336 /// API for generation of expressions captured in a innermost OpenMP
337 /// region.
338 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
339 public:
340  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
341  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
342  OMPD_unknown,
343  /*HasCancel=*/false),
344  PrivScope(CGF) {
345  // Make sure the globals captured in the provided statement are local by
346  // using the privatization logic. We assume the same variable is not
347  // captured more than once.
348  for (const auto &C : CS.captures()) {
349  if (!C.capturesVariable() && !C.capturesVariableByCopy())
350  continue;
351 
352  const VarDecl *VD = C.getCapturedVar();
353  if (VD->isLocalVarDeclOrParm())
354  continue;
355 
356  DeclRefExpr DRE(const_cast<VarDecl *>(VD),
357  /*RefersToEnclosingVariableOrCapture=*/false,
359  C.getLocation());
360  PrivScope.addPrivate(
361  VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
362  }
363  (void)PrivScope.Privatize();
364  }
365 
366  /// Lookup the captured field decl for a variable.
367  const FieldDecl *lookup(const VarDecl *VD) const override {
368  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
369  return FD;
370  return nullptr;
371  }
372 
373  /// Emit the captured statement body.
374  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
375  llvm_unreachable("No body for expressions");
376  }
377 
378  /// Get a variable or parameter for storing global thread id
379  /// inside OpenMP construct.
380  const VarDecl *getThreadIDVariable() const override {
381  llvm_unreachable("No thread id for expressions");
382  }
383 
384  /// Get the name of the capture helper.
385  StringRef getHelperName() const override {
386  llvm_unreachable("No helper name for expressions");
387  }
388 
389  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
390 
391 private:
392  /// Private scope to capture global variables.
394 };
395 
396 /// RAII for emitting code of OpenMP constructs.
397 class InlinedOpenMPRegionRAII {
398  CodeGenFunction &CGF;
399  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
400  FieldDecl *LambdaThisCaptureField = nullptr;
401  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
402 
403 public:
404  /// Constructs region for combined constructs.
405  /// \param CodeGen Code generation sequence for combined directives. Includes
406  /// a list of functions used for code generation of implicitly inlined
407  /// regions.
408  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
409  OpenMPDirectiveKind Kind, bool HasCancel)
410  : CGF(CGF) {
411  // Start emission for the construct.
412  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
413  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
414  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
415  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
416  CGF.LambdaThisCaptureField = nullptr;
417  BlockInfo = CGF.BlockInfo;
418  CGF.BlockInfo = nullptr;
419  }
420 
421  ~InlinedOpenMPRegionRAII() {
422  // Restore original CapturedStmtInfo only if we're done with code emission.
423  auto *OldCSI =
424  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
425  delete CGF.CapturedStmtInfo;
426  CGF.CapturedStmtInfo = OldCSI;
427  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
428  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
429  CGF.BlockInfo = BlockInfo;
430  }
431 };
432 
433 /// Values for bit flags used in the ident_t to describe the fields.
434 /// All enumeric elements are named and described in accordance with the code
435 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
436 enum OpenMPLocationFlags : unsigned {
437  /// Use trampoline for internal microtask.
438  OMP_IDENT_IMD = 0x01,
439  /// Use c-style ident structure.
440  OMP_IDENT_KMPC = 0x02,
441  /// Atomic reduction option for kmpc_reduce.
442  OMP_ATOMIC_REDUCE = 0x10,
443  /// Explicit 'barrier' directive.
444  OMP_IDENT_BARRIER_EXPL = 0x20,
445  /// Implicit barrier in code.
446  OMP_IDENT_BARRIER_IMPL = 0x40,
447  /// Implicit barrier in 'for' directive.
448  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
449  /// Implicit barrier in 'sections' directive.
450  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
451  /// Implicit barrier in 'single' directive.
452  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
453  /// Call of __kmp_for_static_init for static loop.
454  OMP_IDENT_WORK_LOOP = 0x200,
455  /// Call of __kmp_for_static_init for sections.
456  OMP_IDENT_WORK_SECTIONS = 0x400,
457  /// Call of __kmp_for_static_init for distribute.
458  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
459  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
460 };
461 
462 /// Describes ident structure that describes a source location.
463 /// All descriptions are taken from
464 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
465 /// Original structure:
466 /// typedef struct ident {
467 /// kmp_int32 reserved_1; /**< might be used in Fortran;
468 /// see above */
469 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
470 /// KMP_IDENT_KMPC identifies this union
471 /// member */
472 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
473 /// see above */
474 ///#if USE_ITT_BUILD
475 /// /* but currently used for storing
476 /// region-specific ITT */
477 /// /* contextual information. */
478 ///#endif /* USE_ITT_BUILD */
479 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
480 /// C++ */
481 /// char const *psource; /**< String describing the source location.
482 /// The string is composed of semi-colon separated
483 // fields which describe the source file,
484 /// the function and a pair of line numbers that
485 /// delimit the construct.
486 /// */
487 /// } ident_t;
489  /// might be used in Fortran
491  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
493  /// Not really used in Fortran any more
495  /// Source[4] in Fortran, do not use for C++
497  /// String describing the source location. The string is composed of
498  /// semi-colon separated fields which describe the source file, the function
499  /// and a pair of line numbers that delimit the construct.
501 };
502 
503 /// Schedule types for 'omp for' loops (these enumerators are taken from
504 /// the enum sched_type in kmp.h).
506  /// Lower bound for default (unordered) versions.
514  /// static with chunk adjustment (e.g., simd)
516  /// Lower bound for 'ordered' versions.
525  /// dist_schedule types
528  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
529  /// Set if the monotonic schedule modifier was present.
531  /// Set if the nonmonotonic schedule modifier was present.
533 };
534 
536  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
537  /// kmpc_micro microtask, ...);
539  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
540  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
542  /// Call to void __kmpc_threadprivate_register( ident_t *,
543  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
545  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
547  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
548  // kmp_critical_name *crit);
550  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
551  // global_tid, kmp_critical_name *crit, uintptr_t hint);
553  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
554  // kmp_critical_name *crit);
556  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
557  // global_tid);
559  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
561  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
563  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
564  // global_tid);
566  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
567  // global_tid);
569  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
570  // kmp_int32 num_threads);
572  // Call to void __kmpc_flush(ident_t *loc);
574  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
576  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
578  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
579  // int end_part);
581  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
583  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
585  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
586  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
587  // kmp_routine_entry_t *task_entry);
589  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
590  // new_task);
592  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
593  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
594  // kmp_int32 didit);
596  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
597  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
598  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
600  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
601  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
602  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
603  // *lck);
605  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
606  // kmp_critical_name *lck);
608  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
609  // kmp_critical_name *lck);
611  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
612  // kmp_task_t * new_task);
614  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
615  // kmp_task_t * new_task);
617  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
619  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
621  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
622  // global_tid);
624  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
626  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
628  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
629  // int proc_bind);
631  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
632  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
633  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
635  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
636  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
637  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
639  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
640  // global_tid, kmp_int32 cncl_kind);
642  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
643  // kmp_int32 cncl_kind);
645  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
646  // kmp_int32 num_teams, kmp_int32 thread_limit);
648  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
649  // microtask, ...);
651  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
652  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
653  // sched, kmp_uint64 grainsize, void *task_dup);
655  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
656  // num_dims, struct kmp_dim *dims);
658  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
660  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
661  // *vec);
663  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
664  // *vec);
666  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
667  // *data);
669  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
670  // *d);
672 
673  //
674  // Offloading related calls
675  //
676  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
677  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
678  // *arg_types);
680  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
681  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
682  // *arg_types);
684  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
685  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
686  // *arg_types, int32_t num_teams, int32_t thread_limit);
688  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
689  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
690  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
692  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
694  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
696  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
697  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
699  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
700  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
701  // *arg_types);
703  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
704  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
706  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
707  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
708  // *arg_types);
710  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
711  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
713  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
714  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
715  // *arg_types);
717 };
718 
719 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
720 /// region.
721 class CleanupTy final : public EHScopeStack::Cleanup {
722  PrePostActionTy *Action;
723 
724 public:
725  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
726  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
727  if (!CGF.HaveInsertPoint())
728  return;
729  Action->Exit(CGF);
730  }
731 };
732 
733 } // anonymous namespace
734 
737  if (PrePostAction) {
738  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
739  Callback(CodeGen, CGF, *PrePostAction);
740  } else {
741  PrePostActionTy Action;
742  Callback(CodeGen, CGF, Action);
743  }
744 }
745 
746 /// Check if the combiner is a call to UDR combiner and if it is so return the
747 /// UDR decl used for reduction.
748 static const OMPDeclareReductionDecl *
749 getReductionInit(const Expr *ReductionOp) {
750  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
751  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
752  if (const auto *DRE =
753  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
754  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
755  return DRD;
756  return nullptr;
757 }
758 
760  const OMPDeclareReductionDecl *DRD,
761  const Expr *InitOp,
762  Address Private, Address Original,
763  QualType Ty) {
764  if (DRD->getInitializer()) {
765  std::pair<llvm::Function *, llvm::Function *> Reduction =
766  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
767  const auto *CE = cast<CallExpr>(InitOp);
768  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
769  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
770  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
771  const auto *LHSDRE =
772  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
773  const auto *RHSDRE =
774  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
775  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
776  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
777  [=]() { return Private; });
778  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
779  [=]() { return Original; });
780  (void)PrivateScope.Privatize();
781  RValue Func = RValue::get(Reduction.second);
782  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
783  CGF.EmitIgnoredExpr(InitOp);
784  } else {
785  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
786  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
787  auto *GV = new llvm::GlobalVariable(
788  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
789  llvm::GlobalValue::PrivateLinkage, Init, Name);
790  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
791  RValue InitRVal;
792  switch (CGF.getEvaluationKind(Ty)) {
793  case TEK_Scalar:
794  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
795  break;
796  case TEK_Complex:
797  InitRVal =
799  break;
800  case TEK_Aggregate:
801  InitRVal = RValue::getAggregate(LV.getAddress());
802  break;
803  }
804  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
805  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
806  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
807  /*IsInitializer=*/false);
808  }
809 }
810 
811 /// Emit initialization of arrays of complex types.
812 /// \param DestAddr Address of the array.
813 /// \param Type Type of array.
814 /// \param Init Initial expression of array.
815 /// \param SrcAddr Address of the original array.
816 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
817  QualType Type, bool EmitDeclareReductionInit,
818  const Expr *Init,
819  const OMPDeclareReductionDecl *DRD,
820  Address SrcAddr = Address::invalid()) {
821  // Perform element-by-element initialization.
822  QualType ElementTy;
823 
824  // Drill down to the base element type on both arrays.
825  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
826  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
827  DestAddr =
828  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
829  if (DRD)
830  SrcAddr =
831  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
832 
833  llvm::Value *SrcBegin = nullptr;
834  if (DRD)
835  SrcBegin = SrcAddr.getPointer();
836  llvm::Value *DestBegin = DestAddr.getPointer();
837  // Cast from pointer to array type to pointer to single element.
838  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
839  // The basic structure here is a while-do loop.
840  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
841  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
842  llvm::Value *IsEmpty =
843  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
844  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
845 
846  // Enter the loop body, making that address the current address.
847  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
848  CGF.EmitBlock(BodyBB);
849 
850  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
851 
852  llvm::PHINode *SrcElementPHI = nullptr;
853  Address SrcElementCurrent = Address::invalid();
854  if (DRD) {
855  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
856  "omp.arraycpy.srcElementPast");
857  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
858  SrcElementCurrent =
859  Address(SrcElementPHI,
860  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
861  }
862  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
863  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
864  DestElementPHI->addIncoming(DestBegin, EntryBB);
865  Address DestElementCurrent =
866  Address(DestElementPHI,
867  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
868 
869  // Emit copy.
870  {
871  CodeGenFunction::RunCleanupsScope InitScope(CGF);
872  if (EmitDeclareReductionInit) {
873  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
874  SrcElementCurrent, ElementTy);
875  } else
876  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
877  /*IsInitializer=*/false);
878  }
879 
880  if (DRD) {
881  // Shift the address forward by one element.
882  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
883  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
884  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
885  }
886 
887  // Shift the address forward by one element.
888  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
889  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
890  // Check whether we've reached the end.
891  llvm::Value *Done =
892  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
893  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
894  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
895 
896  // Done.
897  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
898 }
899 
902  for (const Decl *D : VD->redecls()) {
903  if (!D->hasAttrs())
904  continue;
905  if (const auto *Attr = D->getAttr<OMPDeclareTargetDeclAttr>())
906  return Attr->getMapType();
907  }
908  if (const auto *V = dyn_cast<VarDecl>(VD)) {
909  if (const VarDecl *TD = V->getTemplateInstantiationPattern())
910  return isDeclareTargetDeclaration(TD);
911  } else if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
912  if (const auto *TD = FD->getTemplateInstantiationPattern())
913  return isDeclareTargetDeclaration(TD);
914  }
915 
916  return llvm::None;
917 }
918 
919 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
920  return CGF.EmitOMPSharedLValue(E);
921 }
922 
923 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
924  const Expr *E) {
925  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
926  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
927  return LValue();
928 }
929 
930 void ReductionCodeGen::emitAggregateInitialization(
931  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
932  const OMPDeclareReductionDecl *DRD) {
933  // Emit VarDecl with copy init for arrays.
934  // Get the address of the original variable captured in current
935  // captured region.
936  const auto *PrivateVD =
937  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
938  bool EmitDeclareReductionInit =
939  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
940  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
941  EmitDeclareReductionInit,
942  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
943  : PrivateVD->getInit(),
944  DRD, SharedLVal.getAddress());
945 }
946 
949  ArrayRef<const Expr *> ReductionOps) {
950  ClausesData.reserve(Shareds.size());
951  SharedAddresses.reserve(Shareds.size());
952  Sizes.reserve(Shareds.size());
953  BaseDecls.reserve(Shareds.size());
954  auto IPriv = Privates.begin();
955  auto IRed = ReductionOps.begin();
956  for (const Expr *Ref : Shareds) {
957  ClausesData.emplace_back(Ref, *IPriv, *IRed);
958  std::advance(IPriv, 1);
959  std::advance(IRed, 1);
960  }
961 }
962 
963 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
964  assert(SharedAddresses.size() == N &&
965  "Number of generated lvalues must be exactly N.");
966  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
967  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
968  SharedAddresses.emplace_back(First, Second);
969 }
970 
972  const auto *PrivateVD =
973  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
974  QualType PrivateType = PrivateVD->getType();
975  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
976  if (!PrivateType->isVariablyModifiedType()) {
977  Sizes.emplace_back(
978  CGF.getTypeSize(
979  SharedAddresses[N].first.getType().getNonReferenceType()),
980  nullptr);
981  return;
982  }
983  llvm::Value *Size;
984  llvm::Value *SizeInChars;
985  auto *ElemType =
986  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
987  ->getElementType();
988  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
989  if (AsArraySection) {
990  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
991  SharedAddresses[N].first.getPointer());
992  Size = CGF.Builder.CreateNUWAdd(
993  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
994  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
995  } else {
996  SizeInChars = CGF.getTypeSize(
997  SharedAddresses[N].first.getType().getNonReferenceType());
998  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
999  }
1000  Sizes.emplace_back(SizeInChars, Size);
1002  CGF,
1003  cast<OpaqueValueExpr>(
1004  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1005  RValue::get(Size));
1006  CGF.EmitVariablyModifiedType(PrivateType);
1007 }
1008 
1010  llvm::Value *Size) {
1011  const auto *PrivateVD =
1012  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1013  QualType PrivateType = PrivateVD->getType();
1014  if (!PrivateType->isVariablyModifiedType()) {
1015  assert(!Size && !Sizes[N].second &&
1016  "Size should be nullptr for non-variably modified reduction "
1017  "items.");
1018  return;
1019  }
1021  CGF,
1022  cast<OpaqueValueExpr>(
1023  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1024  RValue::get(Size));
1025  CGF.EmitVariablyModifiedType(PrivateType);
1026 }
1027 
1029  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1030  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1031  assert(SharedAddresses.size() > N && "No variable was generated");
1032  const auto *PrivateVD =
1033  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1034  const OMPDeclareReductionDecl *DRD =
1035  getReductionInit(ClausesData[N].ReductionOp);
1036  QualType PrivateType = PrivateVD->getType();
1037  PrivateAddr = CGF.Builder.CreateElementBitCast(
1038  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1039  QualType SharedType = SharedAddresses[N].first.getType();
1040  SharedLVal = CGF.MakeAddrLValue(
1041  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1042  CGF.ConvertTypeForMem(SharedType)),
1043  SharedType, SharedAddresses[N].first.getBaseInfo(),
1044  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1045  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1046  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1047  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1048  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1049  PrivateAddr, SharedLVal.getAddress(),
1050  SharedLVal.getType());
1051  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1052  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1053  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1054  PrivateVD->getType().getQualifiers(),
1055  /*IsInitializer=*/false);
1056  }
1057 }
1058 
1060  const auto *PrivateVD =
1061  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1062  QualType PrivateType = PrivateVD->getType();
1063  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1064  return DTorKind != QualType::DK_none;
1065 }
1066 
1068  Address PrivateAddr) {
1069  const auto *PrivateVD =
1070  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1071  QualType PrivateType = PrivateVD->getType();
1072  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1073  if (needCleanups(N)) {
1074  PrivateAddr = CGF.Builder.CreateElementBitCast(
1075  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1076  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1077  }
1078 }
1079 
1081  LValue BaseLV) {
1082  BaseTy = BaseTy.getNonReferenceType();
1083  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1084  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1085  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1086  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1087  } else {
1088  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1089  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1090  }
1091  BaseTy = BaseTy->getPointeeType();
1092  }
1093  return CGF.MakeAddrLValue(
1095  CGF.ConvertTypeForMem(ElTy)),
1096  BaseLV.getType(), BaseLV.getBaseInfo(),
1097  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1098 }
1099 
1101  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1102  llvm::Value *Addr) {
1103  Address Tmp = Address::invalid();
1104  Address TopTmp = Address::invalid();
1105  Address MostTopTmp = Address::invalid();
1106  BaseTy = BaseTy.getNonReferenceType();
1107  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1108  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1109  Tmp = CGF.CreateMemTemp(BaseTy);
1110  if (TopTmp.isValid())
1111  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1112  else
1113  MostTopTmp = Tmp;
1114  TopTmp = Tmp;
1115  BaseTy = BaseTy->getPointeeType();
1116  }
1117  llvm::Type *Ty = BaseLVType;
1118  if (Tmp.isValid())
1119  Ty = Tmp.getElementType();
1120  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1121  if (Tmp.isValid()) {
1122  CGF.Builder.CreateStore(Addr, Tmp);
1123  return MostTopTmp;
1124  }
1125  return Address(Addr, BaseLVAlignment);
1126 }
1127 
1128 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1129  const VarDecl *OrigVD = nullptr;
1130  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1131  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1132  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1133  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1134  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1135  Base = TempASE->getBase()->IgnoreParenImpCasts();
1136  DE = cast<DeclRefExpr>(Base);
1137  OrigVD = cast<VarDecl>(DE->getDecl());
1138  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1139  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1140  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1141  Base = TempASE->getBase()->IgnoreParenImpCasts();
1142  DE = cast<DeclRefExpr>(Base);
1143  OrigVD = cast<VarDecl>(DE->getDecl());
1144  }
1145  return OrigVD;
1146 }
1147 
1149  Address PrivateAddr) {
1150  const DeclRefExpr *DE;
1151  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1152  BaseDecls.emplace_back(OrigVD);
1153  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1154  LValue BaseLValue =
1155  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1156  OriginalBaseLValue);
1157  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1158  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1159  llvm::Value *PrivatePointer =
1161  PrivateAddr.getPointer(),
1162  SharedAddresses[N].first.getAddress().getType());
1163  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1164  return castToBase(CGF, OrigVD->getType(),
1165  SharedAddresses[N].first.getType(),
1166  OriginalBaseLValue.getAddress().getType(),
1167  OriginalBaseLValue.getAlignment(), Ptr);
1168  }
1169  BaseDecls.emplace_back(
1170  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1171  return PrivateAddr;
1172 }
1173 
1175  const OMPDeclareReductionDecl *DRD =
1176  getReductionInit(ClausesData[N].ReductionOp);
1177  return DRD && DRD->getInitializer();
1178 }
1179 
1180 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1181  return CGF.EmitLoadOfPointerLValue(
1182  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1183  getThreadIDVariable()->getType()->castAs<PointerType>());
1184 }
1185 
1186 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1187  if (!CGF.HaveInsertPoint())
1188  return;
1189  // 1.2.2 OpenMP Language Terminology
1190  // Structured block - An executable statement with a single entry at the
1191  // top and a single exit at the bottom.
1192  // The point of exit cannot be a branch out of the structured block.
1193  // longjmp() and throw() must not violate the entry/exit criteria.
1194  CGF.EHStack.pushTerminate();
1195  CodeGen(CGF);
1196  CGF.EHStack.popTerminate();
1197 }
1198 
1199 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1200  CodeGenFunction &CGF) {
1201  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1202  getThreadIDVariable()->getType(),
1204 }
1205 
1207  QualType FieldTy) {
1208  auto *Field = FieldDecl::Create(
1209  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1211  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1212  Field->setAccess(AS_public);
1213  DC->addDecl(Field);
1214  return Field;
1215 }
1216 
1217 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1218  StringRef Separator)
1219  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1221  ASTContext &C = CGM.getContext();
1222  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1223  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1224  RD->startDefinition();
1225  // reserved_1
1226  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1227  // flags
1228  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1229  // reserved_2
1230  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1231  // reserved_3
1232  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1233  // psource
1234  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1235  RD->completeDefinition();
1236  IdentQTy = C.getRecordType(RD);
1237  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1238  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1239 
1241 }
1242 
1243 void CGOpenMPRuntime::clear() {
1244  InternalVars.clear();
1245 }
1246 
1247 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1248  SmallString<128> Buffer;
1249  llvm::raw_svector_ostream OS(Buffer);
1250  StringRef Sep = FirstSeparator;
1251  for (StringRef Part : Parts) {
1252  OS << Sep << Part;
1253  Sep = Separator;
1254  }
1255  return OS.str();
1256 }
1257 
1258 static llvm::Function *
1260  const Expr *CombinerInitializer, const VarDecl *In,
1261  const VarDecl *Out, bool IsCombiner) {
1262  // void .omp_combiner.(Ty *in, Ty *out);
1263  ASTContext &C = CGM.getContext();
1264  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1265  FunctionArgList Args;
1266  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1267  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1268  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1269  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1270  Args.push_back(&OmpOutParm);
1271  Args.push_back(&OmpInParm);
1272  const CGFunctionInfo &FnInfo =
1274  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1275  std::string Name = CGM.getOpenMPRuntime().getName(
1276  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1278  Name, &CGM.getModule());
1279  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1280  Fn->removeFnAttr(llvm::Attribute::NoInline);
1281  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1282  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1283  CodeGenFunction CGF(CGM);
1284  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1285  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1286  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1287  Out->getLocation());
1289  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1290  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1291  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1292  .getAddress();
1293  });
1294  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1295  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1296  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1297  .getAddress();
1298  });
1299  (void)Scope.Privatize();
1300  if (!IsCombiner && Out->hasInit() &&
1301  !CGF.isTrivialInitializer(Out->getInit())) {
1302  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1303  Out->getType().getQualifiers(),
1304  /*IsInitializer=*/true);
1305  }
1306  if (CombinerInitializer)
1307  CGF.EmitIgnoredExpr(CombinerInitializer);
1308  Scope.ForceCleanup();
1309  CGF.FinishFunction();
1310  return Fn;
1311 }
1312 
1314  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1315  if (UDRMap.count(D) > 0)
1316  return;
1317  ASTContext &C = CGM.getContext();
1318  if (!In || !Out) {
1319  In = &C.Idents.get("omp_in");
1320  Out = &C.Idents.get("omp_out");
1321  }
1322  llvm::Function *Combiner = emitCombinerOrInitializer(
1323  CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1324  cast<VarDecl>(D->lookup(Out).front()),
1325  /*IsCombiner=*/true);
1326  llvm::Function *Initializer = nullptr;
1327  if (const Expr *Init = D->getInitializer()) {
1328  if (!Priv || !Orig) {
1329  Priv = &C.Idents.get("omp_priv");
1330  Orig = &C.Idents.get("omp_orig");
1331  }
1332  Initializer = emitCombinerOrInitializer(
1333  CGM, D->getType(),
1335  : nullptr,
1336  cast<VarDecl>(D->lookup(Orig).front()),
1337  cast<VarDecl>(D->lookup(Priv).front()),
1338  /*IsCombiner=*/false);
1339  }
1340  UDRMap.try_emplace(D, Combiner, Initializer);
1341  if (CGF) {
1342  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1343  Decls.second.push_back(D);
1344  }
1345 }
1346 
1347 std::pair<llvm::Function *, llvm::Function *>
1349  auto I = UDRMap.find(D);
1350  if (I != UDRMap.end())
1351  return I->second;
1352  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1353  return UDRMap.lookup(D);
1354 }
1355 
1357  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1358  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1359  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1360  assert(ThreadIDVar->getType()->isPointerType() &&
1361  "thread id variable must be of type kmp_int32 *");
1362  CodeGenFunction CGF(CGM, true);
1363  bool HasCancel = false;
1364  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1365  HasCancel = OPD->hasCancel();
1366  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1367  HasCancel = OPSD->hasCancel();
1368  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1369  HasCancel = OPFD->hasCancel();
1370  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1371  HasCancel = OPFD->hasCancel();
1372  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1373  HasCancel = OPFD->hasCancel();
1374  else if (const auto *OPFD =
1375  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1376  HasCancel = OPFD->hasCancel();
1377  else if (const auto *OPFD =
1378  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1379  HasCancel = OPFD->hasCancel();
1380  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1381  HasCancel, OutlinedHelperName);
1382  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1383  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1384 }
1385 
1387  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1388  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1389  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1391  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1392 }
1393 
1395  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1396  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1397  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1399  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1400 }
1401 
1403  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1404  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1405  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1406  bool Tied, unsigned &NumberOfParts) {
1407  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1408  PrePostActionTy &) {
1409  llvm::Value *ThreadID = getThreadID(CGF, D.getLocStart());
1410  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1411  llvm::Value *TaskArgs[] = {
1412  UpLoc, ThreadID,
1413  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1414  TaskTVar->getType()->castAs<PointerType>())
1415  .getPointer()};
1416  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1417  };
1418  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1419  UntiedCodeGen);
1420  CodeGen.setAction(Action);
1421  assert(!ThreadIDVar->getType()->isPointerType() &&
1422  "thread id variable must be of type kmp_int32 for tasks");
1423  const OpenMPDirectiveKind Region =
1424  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1425  : OMPD_task;
1426  const CapturedStmt *CS = D.getCapturedStmt(Region);
1427  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1428  CodeGenFunction CGF(CGM, true);
1429  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1430  InnermostKind,
1431  TD ? TD->hasCancel() : false, Action);
1432  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1433  llvm::Value *Res = CGF.GenerateCapturedStmtFunction(*CS);
1434  if (!Tied)
1435  NumberOfParts = Action.getNumberOfParts();
1436  return Res;
1437 }
1438 
1440  const RecordDecl *RD, const CGRecordLayout &RL,
1441  ArrayRef<llvm::Constant *> Data) {
1442  llvm::StructType *StructTy = RL.getLLVMType();
1443  unsigned PrevIdx = 0;
1444  ConstantInitBuilder CIBuilder(CGM);
1445  auto DI = Data.begin();
1446  for (const FieldDecl *FD : RD->fields()) {
1447  unsigned Idx = RL.getLLVMFieldNo(FD);
1448  // Fill the alignment.
1449  for (unsigned I = PrevIdx; I < Idx; ++I)
1450  Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1451  PrevIdx = Idx + 1;
1452  Fields.add(*DI);
1453  ++DI;
1454  }
1455 }
1456 
1457 template <class... As>
1458 static llvm::GlobalVariable *
1460  ArrayRef<llvm::Constant *> Data, const Twine &Name,
1461  As &&... Args) {
1462  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1463  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1464  ConstantInitBuilder CIBuilder(CGM);
1465  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1466  buildStructValue(Fields, CGM, RD, RL, Data);
1467  return Fields.finishAndCreateGlobal(
1468  Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty),
1469  /*isConstant=*/true, std::forward<As>(Args)...);
1470 }
1471 
1472 template <typename T>
1473 static void
1475  ArrayRef<llvm::Constant *> Data,
1476  T &Parent) {
1477  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1478  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1479  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1480  buildStructValue(Fields, CGM, RD, RL, Data);
1481  Fields.finishAndAddTo(Parent);
1482 }
1483 
1484 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1485  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1486  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1487  if (!Entry) {
1488  if (!DefaultOpenMPPSource) {
1489  // Initialize default location for psource field of ident_t structure of
1490  // all ident_t objects. Format is ";file;function;line;column;;".
1491  // Taken from
1492  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1493  DefaultOpenMPPSource =
1494  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1495  DefaultOpenMPPSource =
1496  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1497  }
1498 
1499  llvm::Constant *Data[] = {llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1500  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1501  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1502  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1503  DefaultOpenMPPSource};
1504  llvm::GlobalValue *DefaultOpenMPLocation = createConstantGlobalStruct(
1505  CGM, IdentQTy, Data, "", llvm::GlobalValue::PrivateLinkage);
1506  DefaultOpenMPLocation->setUnnamedAddr(
1507  llvm::GlobalValue::UnnamedAddr::Global);
1508 
1509  OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1510  }
1511  return Address(Entry, Align);
1512 }
1513 
1515  SourceLocation Loc,
1516  unsigned Flags) {
1517  Flags |= OMP_IDENT_KMPC;
1518  // If no debug info is generated - return global default location.
1519  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1520  Loc.isInvalid())
1521  return getOrCreateDefaultLocation(Flags).getPointer();
1522 
1523  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1524 
1525  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1526  Address LocValue = Address::invalid();
1527  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1528  if (I != OpenMPLocThreadIDMap.end())
1529  LocValue = Address(I->second.DebugLoc, Align);
1530 
1531  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1532  // GetOpenMPThreadID was called before this routine.
1533  if (!LocValue.isValid()) {
1534  // Generate "ident_t .kmpc_loc.addr;"
1535  Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1536  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1537  Elem.second.DebugLoc = AI.getPointer();
1538  LocValue = AI;
1539 
1540  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1541  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1542  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1543  CGF.getTypeSize(IdentQTy));
1544  }
1545 
1546  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1547  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1548  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1549  LValue PSource =
1550  CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1551 
1552  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1553  if (OMPDebugLoc == nullptr) {
1554  SmallString<128> Buffer2;
1555  llvm::raw_svector_ostream OS2(Buffer2);
1556  // Build debug location
1558  OS2 << ";" << PLoc.getFilename() << ";";
1559  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1560  OS2 << FD->getQualifiedNameAsString();
1561  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1562  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1563  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1564  }
1565  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1566  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1567 
1568  // Our callers always pass this to a runtime function, so for
1569  // convenience, go ahead and return a naked pointer.
1570  return LocValue.getPointer();
1571 }
1572 
1574  SourceLocation Loc) {
1575  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576 
1577  llvm::Value *ThreadID = nullptr;
1578  // Check whether we've already cached a load of the thread id in this
1579  // function.
1580  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1581  if (I != OpenMPLocThreadIDMap.end()) {
1582  ThreadID = I->second.ThreadID;
1583  if (ThreadID != nullptr)
1584  return ThreadID;
1585  }
1586  // If exceptions are enabled, do not use parameter to avoid possible crash.
1587  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1588  !CGF.getLangOpts().CXXExceptions ||
1589  CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1590  if (auto *OMPRegionInfo =
1591  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1592  if (OMPRegionInfo->getThreadIDVariable()) {
1593  // Check if this an outlined function with thread id passed as argument.
1594  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1595  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1596  // If value loaded in entry block, cache it and use it everywhere in
1597  // function.
1598  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1599  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1600  Elem.second.ThreadID = ThreadID;
1601  }
1602  return ThreadID;
1603  }
1604  }
1605  }
1606 
1607  // This is not an outlined function region - need to call __kmpc_int32
1608  // kmpc_global_thread_num(ident_t *loc).
1609  // Generate thread id value and cache this value for use across the
1610  // function.
1611  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1612  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1613  llvm::CallInst *Call = CGF.Builder.CreateCall(
1615  emitUpdateLocation(CGF, Loc));
1616  Call->setCallingConv(CGF.getRuntimeCC());
1617  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1618  Elem.second.ThreadID = Call;
1619  return Call;
1620 }
1621 
1623  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1624  if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1625  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1626  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1627  for(auto *D : FunctionUDRMap[CGF.CurFn])
1628  UDRMap.erase(D);
1629  FunctionUDRMap.erase(CGF.CurFn);
1630  }
1631 }
1632 
1634  return IdentTy->getPointerTo();
1635 }
1636 
1638  if (!Kmpc_MicroTy) {
1639  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1640  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1641  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1642  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1643  }
1644  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1645 }
1646 
1647 llvm::Constant *
1649  llvm::Constant *RTLFn = nullptr;
1650  switch (static_cast<OpenMPRTLFunction>(Function)) {
1651  case OMPRTL__kmpc_fork_call: {
1652  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1653  // microtask, ...);
1654  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1656  auto *FnTy =
1657  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1658  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1659  break;
1660  }
1662  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1663  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1664  auto *FnTy =
1665  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1666  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1667  break;
1668  }
1670  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1671  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1672  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1674  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1675  auto *FnTy =
1676  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1677  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1678  break;
1679  }
1680  case OMPRTL__kmpc_critical: {
1681  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1682  // kmp_critical_name *crit);
1683  llvm::Type *TypeParams[] = {
1685  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1686  auto *FnTy =
1687  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1688  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1689  break;
1690  }
1692  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1693  // kmp_critical_name *crit, uintptr_t hint);
1694  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1695  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1696  CGM.IntPtrTy};
1697  auto *FnTy =
1698  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1699  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1700  break;
1701  }
1703  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1704  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1705  // typedef void *(*kmpc_ctor)(void *);
1706  auto *KmpcCtorTy =
1707  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1708  /*isVarArg*/ false)->getPointerTo();
1709  // typedef void *(*kmpc_cctor)(void *, void *);
1710  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1711  auto *KmpcCopyCtorTy =
1712  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1713  /*isVarArg*/ false)
1714  ->getPointerTo();
1715  // typedef void (*kmpc_dtor)(void *);
1716  auto *KmpcDtorTy =
1717  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1718  ->getPointerTo();
1719  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1720  KmpcCopyCtorTy, KmpcDtorTy};
1721  auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1722  /*isVarArg*/ false);
1723  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1724  break;
1725  }
1727  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1728  // kmp_critical_name *crit);
1729  llvm::Type *TypeParams[] = {
1731  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1732  auto *FnTy =
1733  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1734  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1735  break;
1736  }
1738  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1739  // global_tid);
1740  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1741  auto *FnTy =
1742  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1743  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1744  break;
1745  }
1746  case OMPRTL__kmpc_barrier: {
1747  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1748  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1749  auto *FnTy =
1750  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1751  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1752  break;
1753  }
1755  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1756  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1757  auto *FnTy =
1758  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1759  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1760  break;
1761  }
1763  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1764  // kmp_int32 num_threads)
1765  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1766  CGM.Int32Ty};
1767  auto *FnTy =
1768  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1769  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1770  break;
1771  }
1773  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1774  // global_tid);
1775  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1776  auto *FnTy =
1777  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1778  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1779  break;
1780  }
1782  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1783  // global_tid);
1784  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1785  auto *FnTy =
1786  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1787  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1788  break;
1789  }
1790  case OMPRTL__kmpc_flush: {
1791  // Build void __kmpc_flush(ident_t *loc);
1792  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1793  auto *FnTy =
1794  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1795  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1796  break;
1797  }
1798  case OMPRTL__kmpc_master: {
1799  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1800  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1801  auto *FnTy =
1802  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1803  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1804  break;
1805  }
1806  case OMPRTL__kmpc_end_master: {
1807  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1808  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1809  auto *FnTy =
1810  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1811  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1812  break;
1813  }
1815  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1816  // int end_part);
1817  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1818  auto *FnTy =
1819  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1820  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1821  break;
1822  }
1823  case OMPRTL__kmpc_single: {
1824  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1825  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1826  auto *FnTy =
1827  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1828  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1829  break;
1830  }
1831  case OMPRTL__kmpc_end_single: {
1832  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1833  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1834  auto *FnTy =
1835  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1836  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1837  break;
1838  }
1840  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1841  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1842  // kmp_routine_entry_t *task_entry);
1843  assert(KmpRoutineEntryPtrTy != nullptr &&
1844  "Type kmp_routine_entry_t must be created.");
1845  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1846  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1847  // Return void * and then cast to particular kmp_task_t type.
1848  auto *FnTy =
1849  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1850  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1851  break;
1852  }
1853  case OMPRTL__kmpc_omp_task: {
1854  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1855  // *new_task);
1856  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1857  CGM.VoidPtrTy};
1858  auto *FnTy =
1859  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1860  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1861  break;
1862  }
1863  case OMPRTL__kmpc_copyprivate: {
1864  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1865  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1866  // kmp_int32 didit);
1867  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1868  auto *CpyFnTy =
1869  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1870  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1871  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1872  CGM.Int32Ty};
1873  auto *FnTy =
1874  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1875  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1876  break;
1877  }
1878  case OMPRTL__kmpc_reduce: {
1879  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1880  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1881  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1882  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1883  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1884  /*isVarArg=*/false);
1885  llvm::Type *TypeParams[] = {
1887  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1888  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1889  auto *FnTy =
1890  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1891  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1892  break;
1893  }
1895  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1896  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1897  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1898  // *lck);
1899  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1900  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1901  /*isVarArg=*/false);
1902  llvm::Type *TypeParams[] = {
1904  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1905  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1906  auto *FnTy =
1907  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1908  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1909  break;
1910  }
1911  case OMPRTL__kmpc_end_reduce: {
1912  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1913  // kmp_critical_name *lck);
1914  llvm::Type *TypeParams[] = {
1916  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1917  auto *FnTy =
1918  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1919  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1920  break;
1921  }
1923  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1924  // kmp_critical_name *lck);
1925  llvm::Type *TypeParams[] = {
1927  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1928  auto *FnTy =
1929  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1930  RTLFn =
1931  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1932  break;
1933  }
1935  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1936  // *new_task);
1937  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1938  CGM.VoidPtrTy};
1939  auto *FnTy =
1940  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1941  RTLFn =
1942  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1943  break;
1944  }
1946  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1947  // *new_task);
1948  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1949  CGM.VoidPtrTy};
1950  auto *FnTy =
1951  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1952  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1953  /*Name=*/"__kmpc_omp_task_complete_if0");
1954  break;
1955  }
1956  case OMPRTL__kmpc_ordered: {
1957  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1958  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1959  auto *FnTy =
1960  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1961  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1962  break;
1963  }
1964  case OMPRTL__kmpc_end_ordered: {
1965  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1966  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1967  auto *FnTy =
1968  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1969  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1970  break;
1971  }
1973  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1974  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1975  auto *FnTy =
1976  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1977  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1978  break;
1979  }
1980  case OMPRTL__kmpc_taskgroup: {
1981  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1982  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1983  auto *FnTy =
1984  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1985  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1986  break;
1987  }
1989  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1990  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1991  auto *FnTy =
1992  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1993  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1994  break;
1995  }
1997  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1998  // int proc_bind)
1999  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2000  auto *FnTy =
2001  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2002  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2003  break;
2004  }
2006  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2007  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2008  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2009  llvm::Type *TypeParams[] = {
2012  auto *FnTy =
2013  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2014  RTLFn =
2015  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2016  break;
2017  }
2019  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2020  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2021  // kmp_depend_info_t *noalias_dep_list);
2022  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2025  auto *FnTy =
2026  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2027  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2028  break;
2029  }
2031  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2032  // global_tid, kmp_int32 cncl_kind)
2033  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2034  auto *FnTy =
2035  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2036  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2037  break;
2038  }
2039  case OMPRTL__kmpc_cancel: {
2040  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2041  // kmp_int32 cncl_kind)
2042  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2043  auto *FnTy =
2044  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2045  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2046  break;
2047  }
2049  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2050  // kmp_int32 num_teams, kmp_int32 num_threads)
2051  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2052  CGM.Int32Ty};
2053  auto *FnTy =
2054  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2055  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2056  break;
2057  }
2058  case OMPRTL__kmpc_fork_teams: {
2059  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2060  // microtask, ...);
2061  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2063  auto *FnTy =
2064  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2065  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2066  break;
2067  }
2068  case OMPRTL__kmpc_taskloop: {
2069  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2070  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2071  // sched, kmp_uint64 grainsize, void *task_dup);
2072  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2073  CGM.IntTy,
2074  CGM.VoidPtrTy,
2075  CGM.IntTy,
2076  CGM.Int64Ty->getPointerTo(),
2077  CGM.Int64Ty->getPointerTo(),
2078  CGM.Int64Ty,
2079  CGM.IntTy,
2080  CGM.IntTy,
2081  CGM.Int64Ty,
2082  CGM.VoidPtrTy};
2083  auto *FnTy =
2084  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2085  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2086  break;
2087  }
2089  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2090  // num_dims, struct kmp_dim *dims);
2091  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2092  CGM.Int32Ty,
2093  CGM.Int32Ty,
2094  CGM.VoidPtrTy};
2095  auto *FnTy =
2096  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2097  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2098  break;
2099  }
2101  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2102  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2103  auto *FnTy =
2104  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2105  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2106  break;
2107  }
2109  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2110  // *vec);
2111  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2112  CGM.Int64Ty->getPointerTo()};
2113  auto *FnTy =
2114  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2115  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2116  break;
2117  }
2119  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2120  // *vec);
2121  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2122  CGM.Int64Ty->getPointerTo()};
2123  auto *FnTy =
2124  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2125  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2126  break;
2127  }
2129  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2130  // *data);
2131  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2132  auto *FnTy =
2133  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2134  RTLFn =
2135  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2136  break;
2137  }
2139  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2140  // *d);
2141  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2142  auto *FnTy =
2143  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2144  RTLFn = CGM.CreateRuntimeFunction(
2145  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2146  break;
2147  }
2148  case OMPRTL__tgt_target: {
2149  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2150  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2151  // *arg_types);
2152  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2153  CGM.VoidPtrTy,
2154  CGM.Int32Ty,
2155  CGM.VoidPtrPtrTy,
2156  CGM.VoidPtrPtrTy,
2157  CGM.SizeTy->getPointerTo(),
2158  CGM.Int64Ty->getPointerTo()};
2159  auto *FnTy =
2160  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2161  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2162  break;
2163  }
2165  // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2166  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2167  // int64_t *arg_types);
2168  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2169  CGM.VoidPtrTy,
2170  CGM.Int32Ty,
2171  CGM.VoidPtrPtrTy,
2172  CGM.VoidPtrPtrTy,
2173  CGM.SizeTy->getPointerTo(),
2174  CGM.Int64Ty->getPointerTo()};
2175  auto *FnTy =
2176  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2177  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2178  break;
2179  }
2180  case OMPRTL__tgt_target_teams: {
2181  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2182  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2183  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2184  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2185  CGM.VoidPtrTy,
2186  CGM.Int32Ty,
2187  CGM.VoidPtrPtrTy,
2188  CGM.VoidPtrPtrTy,
2189  CGM.SizeTy->getPointerTo(),
2190  CGM.Int64Ty->getPointerTo(),
2191  CGM.Int32Ty,
2192  CGM.Int32Ty};
2193  auto *FnTy =
2194  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2195  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2196  break;
2197  }
2199  // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2200  // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
2201  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2202  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2203  CGM.VoidPtrTy,
2204  CGM.Int32Ty,
2205  CGM.VoidPtrPtrTy,
2206  CGM.VoidPtrPtrTy,
2207  CGM.SizeTy->getPointerTo(),
2208  CGM.Int64Ty->getPointerTo(),
2209  CGM.Int32Ty,
2210  CGM.Int32Ty};
2211  auto *FnTy =
2212  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2213  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2214  break;
2215  }
2216  case OMPRTL__tgt_register_lib: {
2217  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2218  QualType ParamTy =
2220  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2221  auto *FnTy =
2222  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2223  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2224  break;
2225  }
2227  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2228  QualType ParamTy =
2230  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2231  auto *FnTy =
2232  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2233  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2234  break;
2235  }
2237  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2238  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2239  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2240  CGM.Int32Ty,
2241  CGM.VoidPtrPtrTy,
2242  CGM.VoidPtrPtrTy,
2243  CGM.SizeTy->getPointerTo(),
2244  CGM.Int64Ty->getPointerTo()};
2245  auto *FnTy =
2246  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2247  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2248  break;
2249  }
2251  // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2252  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2253  // *arg_types);
2254  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2255  CGM.Int32Ty,
2256  CGM.VoidPtrPtrTy,
2257  CGM.VoidPtrPtrTy,
2258  CGM.SizeTy->getPointerTo(),
2259  CGM.Int64Ty->getPointerTo()};
2260  auto *FnTy =
2261  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2262  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2263  break;
2264  }
2266  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2267  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2268  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2269  CGM.Int32Ty,
2270  CGM.VoidPtrPtrTy,
2271  CGM.VoidPtrPtrTy,
2272  CGM.SizeTy->getPointerTo(),
2273  CGM.Int64Ty->getPointerTo()};
2274  auto *FnTy =
2275  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2276  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2277  break;
2278  }
2280  // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2281  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2282  // *arg_types);
2283  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284  CGM.Int32Ty,
2285  CGM.VoidPtrPtrTy,
2286  CGM.VoidPtrPtrTy,
2287  CGM.SizeTy->getPointerTo(),
2288  CGM.Int64Ty->getPointerTo()};
2289  auto *FnTy =
2290  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2291  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2292  break;
2293  }
2295  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2296  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
2297  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2298  CGM.Int32Ty,
2299  CGM.VoidPtrPtrTy,
2300  CGM.VoidPtrPtrTy,
2301  CGM.SizeTy->getPointerTo(),
2302  CGM.Int64Ty->getPointerTo()};
2303  auto *FnTy =
2304  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2305  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2306  break;
2307  }
2309  // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2310  // arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
2311  // *arg_types);
2312  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2313  CGM.Int32Ty,
2314  CGM.VoidPtrPtrTy,
2315  CGM.VoidPtrPtrTy,
2316  CGM.SizeTy->getPointerTo(),
2317  CGM.Int64Ty->getPointerTo()};
2318  auto *FnTy =
2319  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2320  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2321  break;
2322  }
2323  }
2324  assert(RTLFn && "Unable to find OpenMP runtime function");
2325  return RTLFn;
2326 }
2327 
2328 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2329  bool IVSigned) {
2330  assert((IVSize == 32 || IVSize == 64) &&
2331  "IV size is not compatible with the omp runtime");
2332  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2333  : "__kmpc_for_static_init_4u")
2334  : (IVSigned ? "__kmpc_for_static_init_8"
2335  : "__kmpc_for_static_init_8u");
2336  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2337  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2338  llvm::Type *TypeParams[] = {
2339  getIdentTyPointerTy(), // loc
2340  CGM.Int32Ty, // tid
2341  CGM.Int32Ty, // schedtype
2342  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2343  PtrTy, // p_lower
2344  PtrTy, // p_upper
2345  PtrTy, // p_stride
2346  ITy, // incr
2347  ITy // chunk
2348  };
2349  auto *FnTy =
2350  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2351  return CGM.CreateRuntimeFunction(FnTy, Name);
2352 }
2353 
2354 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2355  bool IVSigned) {
2356  assert((IVSize == 32 || IVSize == 64) &&
2357  "IV size is not compatible with the omp runtime");
2358  StringRef Name =
2359  IVSize == 32
2360  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2361  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2362  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2363  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2364  CGM.Int32Ty, // tid
2365  CGM.Int32Ty, // schedtype
2366  ITy, // lower
2367  ITy, // upper
2368  ITy, // stride
2369  ITy // chunk
2370  };
2371  auto *FnTy =
2372  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2373  return CGM.CreateRuntimeFunction(FnTy, Name);
2374 }
2375 
2376 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2377  bool IVSigned) {
2378  assert((IVSize == 32 || IVSize == 64) &&
2379  "IV size is not compatible with the omp runtime");
2380  StringRef Name =
2381  IVSize == 32
2382  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2383  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2384  llvm::Type *TypeParams[] = {
2385  getIdentTyPointerTy(), // loc
2386  CGM.Int32Ty, // tid
2387  };
2388  auto *FnTy =
2389  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2390  return CGM.CreateRuntimeFunction(FnTy, Name);
2391 }
2392 
2393 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2394  bool IVSigned) {
2395  assert((IVSize == 32 || IVSize == 64) &&
2396  "IV size is not compatible with the omp runtime");
2397  StringRef Name =
2398  IVSize == 32
2399  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2400  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2401  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2402  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2403  llvm::Type *TypeParams[] = {
2404  getIdentTyPointerTy(), // loc
2405  CGM.Int32Ty, // tid
2406  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2407  PtrTy, // p_lower
2408  PtrTy, // p_upper
2409  PtrTy // p_stride
2410  };
2411  auto *FnTy =
2412  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2413  return CGM.CreateRuntimeFunction(FnTy, Name);
2414 }
2415 
2417  if (CGM.getLangOpts().OpenMPSimd)
2418  return Address::invalid();
2421  if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Link) {
2422  SmallString<64> PtrName;
2423  {
2424  llvm::raw_svector_ostream OS(PtrName);
2425  OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_link_ptr";
2426  }
2427  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2428  if (!Ptr) {
2429  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2431  PtrName);
2432  if (!CGM.getLangOpts().OpenMPIsDevice) {
2433  auto *GV = cast<llvm::GlobalVariable>(Ptr);
2434  GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2435  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2436  }
2437  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2438  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2439  }
2440  return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2441  }
2442  return Address::invalid();
2443 }
2444 
2445 llvm::Constant *
2447  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2449  // Lookup the entry, lazily creating it if necessary.
2450  std::string Suffix = getName({"cache", ""});
2452  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2453 }
2454 
2456  const VarDecl *VD,
2457  Address VDAddr,
2458  SourceLocation Loc) {
2459  if (CGM.getLangOpts().OpenMPUseTLS &&
2461  return VDAddr;
2462 
2463  llvm::Type *VarTy = VDAddr.getElementType();
2464  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2465  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2466  CGM.Int8PtrTy),
2469  return Address(CGF.EmitRuntimeCall(
2471  VDAddr.getAlignment());
2472 }
2473 
2475  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2476  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2477  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2478  // library.
2479  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2481  OMPLoc);
2482  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2483  // to register constructor/destructor for variable.
2484  llvm::Value *Args[] = {
2485  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2486  Ctor, CopyCtor, Dtor};
2487  CGF.EmitRuntimeCall(
2489 }
2490 
2492  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2493  bool PerformInit, CodeGenFunction *CGF) {
2494  if (CGM.getLangOpts().OpenMPUseTLS &&
2496  return nullptr;
2497 
2498  VD = VD->getDefinition(CGM.getContext());
2499  if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2500  ThreadPrivateWithDefinition.insert(VD);
2501  QualType ASTTy = VD->getType();
2502 
2503  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2504  const Expr *Init = VD->getAnyInitializer();
2505  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2506  // Generate function that re-emits the declaration's initializer into the
2507  // threadprivate copy of the variable VD
2508  CodeGenFunction CtorCGF(CGM);
2509  FunctionArgList Args;
2510  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2511  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2513  Args.push_back(&Dst);
2514 
2515  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2516  CGM.getContext().VoidPtrTy, Args);
2517  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2518  std::string Name = getName({"__kmpc_global_ctor_", ""});
2519  llvm::Function *Fn =
2520  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2521  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2522  Args, Loc, Loc);
2523  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2524  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2525  CGM.getContext().VoidPtrTy, Dst.getLocation());
2526  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2527  Arg = CtorCGF.Builder.CreateElementBitCast(
2528  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2529  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2530  /*IsInitializer=*/true);
2531  ArgVal = CtorCGF.EmitLoadOfScalar(
2532  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2533  CGM.getContext().VoidPtrTy, Dst.getLocation());
2534  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2535  CtorCGF.FinishFunction();
2536  Ctor = Fn;
2537  }
2538  if (VD->getType().isDestructedType() != QualType::DK_none) {
2539  // Generate function that emits destructor call for the threadprivate copy
2540  // of the variable VD
2541  CodeGenFunction DtorCGF(CGM);
2542  FunctionArgList Args;
2543  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2544  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2546  Args.push_back(&Dst);
2547 
2548  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2549  CGM.getContext().VoidTy, Args);
2550  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2551  std::string Name = getName({"__kmpc_global_dtor_", ""});
2552  llvm::Function *Fn =
2553  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2554  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2555  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2556  Loc, Loc);
2557  // Create a scope with an artificial location for the body of this function.
2558  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2559  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2560  DtorCGF.GetAddrOfLocalVar(&Dst),
2561  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2562  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2563  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2564  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2565  DtorCGF.FinishFunction();
2566  Dtor = Fn;
2567  }
2568  // Do not emit init function if it is not required.
2569  if (!Ctor && !Dtor)
2570  return nullptr;
2571 
2572  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2573  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2574  /*isVarArg=*/false)
2575  ->getPointerTo();
2576  // Copying constructor for the threadprivate variable.
2577  // Must be NULL - reserved by runtime, but currently it requires that this
2578  // parameter is always NULL. Otherwise it fires assertion.
2579  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2580  if (Ctor == nullptr) {
2581  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2582  /*isVarArg=*/false)
2583  ->getPointerTo();
2584  Ctor = llvm::Constant::getNullValue(CtorTy);
2585  }
2586  if (Dtor == nullptr) {
2587  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2588  /*isVarArg=*/false)
2589  ->getPointerTo();
2590  Dtor = llvm::Constant::getNullValue(DtorTy);
2591  }
2592  if (!CGF) {
2593  auto *InitFunctionTy =
2594  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2595  std::string Name = getName({"__omp_threadprivate_init_", ""});
2596  llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2597  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2598  CodeGenFunction InitCGF(CGM);
2599  FunctionArgList ArgList;
2600  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2601  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2602  Loc, Loc);
2603  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2604  InitCGF.FinishFunction();
2605  return InitFunction;
2606  }
2607  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2608  }
2609  return nullptr;
2610 }
2611 
2612 /// Obtain information that uniquely identifies a target entry. This
2613 /// consists of the file and device IDs as well as line number associated with
2614 /// the relevant entry source location.
2616  unsigned &DeviceID, unsigned &FileID,
2617  unsigned &LineNum) {
2619 
2620  // The loc should be always valid and have a file ID (the user cannot use
2621  // #pragma directives in macros)
2622 
2623  assert(Loc.isValid() && "Source location is expected to be always valid.");
2624 
2625  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2626  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2627 
2628  llvm::sys::fs::UniqueID ID;
2629  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2630  SM.getDiagnostics().Report(diag::err_cannot_open_file)
2631  << PLoc.getFilename() << EC.message();
2632 
2633  DeviceID = ID.getDevice();
2634  FileID = ID.getFile();
2635  LineNum = PLoc.getLine();
2636 }
2637 
2639  llvm::GlobalVariable *Addr,
2640  bool PerformInit) {
2643  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link)
2644  return false;
2645  VD = VD->getDefinition(CGM.getContext());
2646  if (VD && !DeclareTargetWithDefinition.insert(VD).second)
2647  return CGM.getLangOpts().OpenMPIsDevice;
2648 
2649  QualType ASTTy = VD->getType();
2650 
2652  // Produce the unique prefix to identify the new target regions. We use
2653  // the source location of the variable declaration which we know to not
2654  // conflict with any target region.
2655  unsigned DeviceID;
2656  unsigned FileID;
2657  unsigned Line;
2658  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2659  SmallString<128> Buffer, Out;
2660  {
2661  llvm::raw_svector_ostream OS(Buffer);
2662  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2663  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2664  }
2665 
2666  const Expr *Init = VD->getAnyInitializer();
2667  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2668  llvm::Constant *Ctor;
2669  llvm::Constant *ID;
2670  if (CGM.getLangOpts().OpenMPIsDevice) {
2671  // Generate function that re-emits the declaration's initializer into
2672  // the threadprivate copy of the variable VD
2673  CodeGenFunction CtorCGF(CGM);
2674 
2676  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2677  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2678  FTy, Twine(Buffer, "_ctor"), FI, Loc);
2679  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2680  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2681  FunctionArgList(), Loc, Loc);
2682  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2683  CtorCGF.EmitAnyExprToMem(Init,
2684  Address(Addr, CGM.getContext().getDeclAlign(VD)),
2685  Init->getType().getQualifiers(),
2686  /*IsInitializer=*/true);
2687  CtorCGF.FinishFunction();
2688  Ctor = Fn;
2689  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2690  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2691  } else {
2692  Ctor = new llvm::GlobalVariable(
2693  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2694  llvm::GlobalValue::PrivateLinkage,
2695  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2696  ID = Ctor;
2697  }
2698 
2699  // Register the information for the entry associated with the constructor.
2700  Out.clear();
2702  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2704  }
2705  if (VD->getType().isDestructedType() != QualType::DK_none) {
2706  llvm::Constant *Dtor;
2707  llvm::Constant *ID;
2708  if (CGM.getLangOpts().OpenMPIsDevice) {
2709  // Generate function that emits destructor call for the threadprivate
2710  // copy of the variable VD
2711  CodeGenFunction DtorCGF(CGM);
2712 
2714  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2715  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2716  FTy, Twine(Buffer, "_dtor"), FI, Loc);
2717  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2718  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2719  FunctionArgList(), Loc, Loc);
2720  // Create a scope with an artificial location for the body of this
2721  // function.
2722  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2723  DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2724  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2725  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2726  DtorCGF.FinishFunction();
2727  Dtor = Fn;
2728  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2729  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2730  } else {
2731  Dtor = new llvm::GlobalVariable(
2732  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2733  llvm::GlobalValue::PrivateLinkage,
2734  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2735  ID = Dtor;
2736  }
2737  // Register the information for the entry associated with the destructor.
2738  Out.clear();
2740  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2742  }
2743  return CGM.getLangOpts().OpenMPIsDevice;
2744 }
2745 
2747  QualType VarType,
2748  StringRef Name) {
2749  std::string Suffix = getName({"artificial", ""});
2750  std::string CacheSuffix = getName({"cache", ""});
2751  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2752  llvm::Value *GAddr =
2753  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2754  llvm::Value *Args[] = {
2756  getThreadID(CGF, SourceLocation()),
2758  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2759  /*IsSigned=*/false),
2761  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2762  return Address(
2764  CGF.EmitRuntimeCall(
2766  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2767  CGM.getPointerAlign());
2768 }
2769 
2771  const RegionCodeGenTy &ThenGen,
2772  const RegionCodeGenTy &ElseGen) {
2773  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2774 
2775  // If the condition constant folds and can be elided, try to avoid emitting
2776  // the condition and the dead arm of the if/else.
2777  bool CondConstant;
2778  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2779  if (CondConstant)
2780  ThenGen(CGF);
2781  else
2782  ElseGen(CGF);
2783  return;
2784  }
2785 
2786  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2787  // emit the conditional branch.
2788  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2789  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2790  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2791  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2792 
2793  // Emit the 'then' code.
2794  CGF.EmitBlock(ThenBlock);
2795  ThenGen(CGF);
2796  CGF.EmitBranch(ContBlock);
2797  // Emit the 'else' code if present.
2798  // There is no need to emit line number for unconditional branch.
2800  CGF.EmitBlock(ElseBlock);
2801  ElseGen(CGF);
2802  // There is no need to emit line number for unconditional branch.
2804  CGF.EmitBranch(ContBlock);
2805  // Emit the continuation block for code after the if.
2806  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2807 }
2808 
2810  llvm::Value *OutlinedFn,
2811  ArrayRef<llvm::Value *> CapturedVars,
2812  const Expr *IfCond) {
2813  if (!CGF.HaveInsertPoint())
2814  return;
2815  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2816  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2817  PrePostActionTy &) {
2818  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2819  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2820  llvm::Value *Args[] = {
2821  RTLoc,
2822  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2823  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2825  RealArgs.append(std::begin(Args), std::end(Args));
2826  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2827 
2828  llvm::Value *RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2829  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2830  };
2831  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2832  PrePostActionTy &) {
2833  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2834  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2835  // Build calls:
2836  // __kmpc_serialized_parallel(&Loc, GTid);
2837  llvm::Value *Args[] = {RTLoc, ThreadID};
2838  CGF.EmitRuntimeCall(
2839  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2840 
2841  // OutlinedFn(&GTid, &zero, CapturedStruct);
2842  Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2843  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2844  /*Name*/ ".zero.addr");
2845  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2846  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2847  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2848  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2849  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2850  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2851 
2852  // __kmpc_end_serialized_parallel(&Loc, GTid);
2853  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2854  CGF.EmitRuntimeCall(
2855  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2856  EndArgs);
2857  };
2858  if (IfCond) {
2859  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2860  } else {
2861  RegionCodeGenTy ThenRCG(ThenGen);
2862  ThenRCG(CGF);
2863  }
2864 }
2865 
2866 // If we're inside an (outlined) parallel region, use the region info's
2867 // thread-ID variable (it is passed in a first argument of the outlined function
2868 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2869 // regular serial code region, get thread ID by calling kmp_int32
2870 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2871 // return the address of that temp.
2873  SourceLocation Loc) {
2874  if (auto *OMPRegionInfo =
2875  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2876  if (OMPRegionInfo->getThreadIDVariable())
2877  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2878 
2879  llvm::Value *ThreadID = getThreadID(CGF, Loc);
2880  QualType Int32Ty =
2881  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2882  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2883  CGF.EmitStoreOfScalar(ThreadID,
2884  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2885 
2886  return ThreadIDTemp;
2887 }
2888 
2889 llvm::Constant *
2891  const llvm::Twine &Name) {
2892  SmallString<256> Buffer;
2893  llvm::raw_svector_ostream Out(Buffer);
2894  Out << Name;
2895  StringRef RuntimeName = Out.str();
2896  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2897  if (Elem.second) {
2898  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2899  "OMP internal variable has different type than requested");
2900  return &*Elem.second;
2901  }
2902 
2903  return Elem.second = new llvm::GlobalVariable(
2904  CGM.getModule(), Ty, /*IsConstant*/ false,
2905  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2906  Elem.first());
2907 }
2908 
2910  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2911  std::string Name = getName({Prefix, "var"});
2912  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2913 }
2914 
2915 namespace {
2916 /// Common pre(post)-action for different OpenMP constructs.
2917 class CommonActionTy final : public PrePostActionTy {
2918  llvm::Value *EnterCallee;
2919  ArrayRef<llvm::Value *> EnterArgs;
2920  llvm::Value *ExitCallee;
2921  ArrayRef<llvm::Value *> ExitArgs;
2922  bool Conditional;
2923  llvm::BasicBlock *ContBlock = nullptr;
2924 
2925 public:
2926  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2927  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2928  bool Conditional = false)
2929  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2930  ExitArgs(ExitArgs), Conditional(Conditional) {}
2931  void Enter(CodeGenFunction &CGF) override {
2932  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2933  if (Conditional) {
2934  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2935  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2936  ContBlock = CGF.createBasicBlock("omp_if.end");
2937  // Generate the branch (If-stmt)
2938  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2939  CGF.EmitBlock(ThenBlock);
2940  }
2941  }
2942  void Done(CodeGenFunction &CGF) {
2943  // Emit the rest of blocks/branches
2944  CGF.EmitBranch(ContBlock);
2945  CGF.EmitBlock(ContBlock, true);
2946  }
2947  void Exit(CodeGenFunction &CGF) override {
2948  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2949  }
2950 };
2951 } // anonymous namespace
2952 
2954  StringRef CriticalName,
2955  const RegionCodeGenTy &CriticalOpGen,
2956  SourceLocation Loc, const Expr *Hint) {
2957  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2958  // CriticalOpGen();
2959  // __kmpc_end_critical(ident_t *, gtid, Lock);
2960  // Prepare arguments and build a call to __kmpc_critical
2961  if (!CGF.HaveInsertPoint())
2962  return;
2963  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2964  getCriticalRegionLock(CriticalName)};
2965  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2966  std::end(Args));
2967  if (Hint) {
2968  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2969  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2970  }
2971  CommonActionTy Action(
2975  CriticalOpGen.setAction(Action);
2976  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2977 }
2978 
2980  const RegionCodeGenTy &MasterOpGen,
2981  SourceLocation Loc) {
2982  if (!CGF.HaveInsertPoint())
2983  return;
2984  // if(__kmpc_master(ident_t *, gtid)) {
2985  // MasterOpGen();
2986  // __kmpc_end_master(ident_t *, gtid);
2987  // }
2988  // Prepare arguments and build a call to __kmpc_master
2989  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2990  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2992  /*Conditional=*/true);
2993  MasterOpGen.setAction(Action);
2994  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2995  Action.Done(CGF);
2996 }
2997 
2999  SourceLocation Loc) {
3000  if (!CGF.HaveInsertPoint())
3001  return;
3002  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3003  llvm::Value *Args[] = {
3004  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3005  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3007  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3008  Region->emitUntiedSwitch(CGF);
3009 }
3010 
3012  const RegionCodeGenTy &TaskgroupOpGen,
3013  SourceLocation Loc) {
3014  if (!CGF.HaveInsertPoint())
3015  return;
3016  // __kmpc_taskgroup(ident_t *, gtid);
3017  // TaskgroupOpGen();
3018  // __kmpc_end_taskgroup(ident_t *, gtid);
3019  // Prepare arguments and build a call to __kmpc_taskgroup
3020  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3021  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3023  Args);
3024  TaskgroupOpGen.setAction(Action);
3025  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3026 }
3027 
3028 /// Given an array of pointers to variables, project the address of a
3029 /// given variable.
3031  unsigned Index, const VarDecl *Var) {
3032  // Pull out the pointer to the variable.
3033  Address PtrAddr =
3034  CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
3035  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3036 
3037  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3038  Addr = CGF.Builder.CreateElementBitCast(
3039  Addr, CGF.ConvertTypeForMem(Var->getType()));
3040  return Addr;
3041 }
3042 
3044  CodeGenModule &CGM, llvm::Type *ArgsType,
3045  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3046  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3047  SourceLocation Loc) {
3048  ASTContext &C = CGM.getContext();
3049  // void copy_func(void *LHSArg, void *RHSArg);
3050  FunctionArgList Args;
3051  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3053  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3055  Args.push_back(&LHSArg);
3056  Args.push_back(&RHSArg);
3057  const auto &CGFI =
3059  std::string Name =
3060  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3061  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3063  &CGM.getModule());
3064  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3065  Fn->setDoesNotRecurse();
3066  CodeGenFunction CGF(CGM);
3067  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3068  // Dest = (void*[n])(LHSArg);
3069  // Src = (void*[n])(RHSArg);
3071  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3072  ArgsType), CGF.getPointerAlign());
3074  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3075  ArgsType), CGF.getPointerAlign());
3076  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3077  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3078  // ...
3079  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3080  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3081  const auto *DestVar =
3082  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3083  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3084 
3085  const auto *SrcVar =
3086  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3087  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3088 
3089  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3090  QualType Type = VD->getType();
3091  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3092  }
3093  CGF.FinishFunction();
3094  return Fn;
3095 }
3096 
3098  const RegionCodeGenTy &SingleOpGen,
3099  SourceLocation Loc,
3100  ArrayRef<const Expr *> CopyprivateVars,
3101  ArrayRef<const Expr *> SrcExprs,
3102  ArrayRef<const Expr *> DstExprs,
3103  ArrayRef<const Expr *> AssignmentOps) {
3104  if (!CGF.HaveInsertPoint())
3105  return;
3106  assert(CopyprivateVars.size() == SrcExprs.size() &&
3107  CopyprivateVars.size() == DstExprs.size() &&
3108  CopyprivateVars.size() == AssignmentOps.size());
3109  ASTContext &C = CGM.getContext();
3110  // int32 did_it = 0;
3111  // if(__kmpc_single(ident_t *, gtid)) {
3112  // SingleOpGen();
3113  // __kmpc_end_single(ident_t *, gtid);
3114  // did_it = 1;
3115  // }
3116  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3117  // <copy_func>, did_it);
3118 
3119  Address DidIt = Address::invalid();
3120  if (!CopyprivateVars.empty()) {
3121  // int32 did_it = 0;
3122  QualType KmpInt32Ty =
3123  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3124  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3125  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3126  }
3127  // Prepare arguments and build a call to __kmpc_single
3128  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3129  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3131  /*Conditional=*/true);
3132  SingleOpGen.setAction(Action);
3133  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3134  if (DidIt.isValid()) {
3135  // did_it = 1;
3136  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3137  }
3138  Action.Done(CGF);
3139  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3140  // <copy_func>, did_it);
3141  if (DidIt.isValid()) {
3142  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3143  QualType CopyprivateArrayTy =
3144  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3145  /*IndexTypeQuals=*/0);
3146  // Create a list of all private variables for copyprivate.
3147  Address CopyprivateList =
3148  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3149  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3150  Address Elem = CGF.Builder.CreateConstArrayGEP(
3151  CopyprivateList, I, CGF.getPointerSize());
3152  CGF.Builder.CreateStore(
3154  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3155  Elem);
3156  }
3157  // Build function that copies private values from single region to all other
3158  // threads in the corresponding parallel region.
3160  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3161  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3162  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3163  Address CL =
3164  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3165  CGF.VoidPtrTy);
3166  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3167  llvm::Value *Args[] = {
3168  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3169  getThreadID(CGF, Loc), // i32 <gtid>
3170  BufSize, // size_t <buf_size>
3171  CL.getPointer(), // void *<copyprivate list>
3172  CpyFn, // void (*) (void *, void *) <copy_func>
3173  DidItVal // i32 did_it
3174  };
3176  }
3177 }
3178 
3180  const RegionCodeGenTy &OrderedOpGen,
3181  SourceLocation Loc, bool IsThreads) {
3182  if (!CGF.HaveInsertPoint())
3183  return;
3184  // __kmpc_ordered(ident_t *, gtid);
3185  // OrderedOpGen();
3186  // __kmpc_end_ordered(ident_t *, gtid);
3187  // Prepare arguments and build a call to __kmpc_ordered
3188  if (IsThreads) {
3189  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3190  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3192  Args);
3193  OrderedOpGen.setAction(Action);
3194  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3195  return;
3196  }
3197  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3198 }
3199 
3201  OpenMPDirectiveKind Kind, bool EmitChecks,
3202  bool ForceSimpleCall) {
3203  if (!CGF.HaveInsertPoint())
3204  return;
3205  // Build call __kmpc_cancel_barrier(loc, thread_id);
3206  // Build call __kmpc_barrier(loc, thread_id);
3207  unsigned Flags;
3208  if (Kind == OMPD_for)
3209  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3210  else if (Kind == OMPD_sections)
3211  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3212  else if (Kind == OMPD_single)
3213  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3214  else if (Kind == OMPD_barrier)
3215  Flags = OMP_IDENT_BARRIER_EXPL;
3216  else
3217  Flags = OMP_IDENT_BARRIER_IMPL;
3218  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3219  // thread_id);
3220  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3221  getThreadID(CGF, Loc)};
3222  if (auto *OMPRegionInfo =
3223  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3224  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3225  llvm::Value *Result = CGF.EmitRuntimeCall(
3227  if (EmitChecks) {
3228  // if (__kmpc_cancel_barrier()) {
3229  // exit from construct;
3230  // }
3231  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3232  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3233  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3234  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3235  CGF.EmitBlock(ExitBB);
3236  // exit from construct;
3237  CodeGenFunction::JumpDest CancelDestination =
3238  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3239  CGF.EmitBranchThroughCleanup(CancelDestination);
3240  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3241  }
3242  return;
3243  }
3244  }
3246 }
3247 
3248 /// Map the OpenMP loop schedule to the runtime enumeration.
3250  bool Chunked, bool Ordered) {
3251  switch (ScheduleKind) {
3252  case OMPC_SCHEDULE_static:
3253  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3254  : (Ordered ? OMP_ord_static : OMP_sch_static);
3255  case OMPC_SCHEDULE_dynamic:
3257  case OMPC_SCHEDULE_guided:
3259  case OMPC_SCHEDULE_runtime:
3260  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3261  case OMPC_SCHEDULE_auto:
3262  return Ordered ? OMP_ord_auto : OMP_sch_auto;
3263  case OMPC_SCHEDULE_unknown:
3264  assert(!Chunked && "chunk was specified but schedule kind not known");
3265  return Ordered ? OMP_ord_static : OMP_sch_static;
3266  }
3267  llvm_unreachable("Unexpected runtime schedule");
3268 }
3269 
3270 /// Map the OpenMP distribute schedule to the runtime enumeration.
3271 static OpenMPSchedType
3273  // only static is allowed for dist_schedule
3275 }
3276 
3278  bool Chunked) const {
3279  OpenMPSchedType Schedule =
3280  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3281  return Schedule == OMP_sch_static;
3282 }
3283 
3285  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3286  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3287  return Schedule == OMP_dist_sch_static;
3288 }
3289 
3290 
3292  OpenMPSchedType Schedule =
3293  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3294  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3295  return Schedule != OMP_sch_static;
3296 }
3297 
3301  int Modifier = 0;
3302  switch (M1) {
3303  case OMPC_SCHEDULE_MODIFIER_monotonic:
3304  Modifier = OMP_sch_modifier_monotonic;
3305  break;
3306  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3307  Modifier = OMP_sch_modifier_nonmonotonic;
3308  break;
3309  case OMPC_SCHEDULE_MODIFIER_simd:
3310  if (Schedule == OMP_sch_static_chunked)
3312  break;
3315  break;
3316  }
3317  switch (M2) {
3318  case OMPC_SCHEDULE_MODIFIER_monotonic:
3319  Modifier = OMP_sch_modifier_monotonic;
3320  break;
3321  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3322  Modifier = OMP_sch_modifier_nonmonotonic;
3323  break;
3324  case OMPC_SCHEDULE_MODIFIER_simd:
3325  if (Schedule == OMP_sch_static_chunked)
3327  break;
3330  break;
3331  }
3332  return Schedule | Modifier;
3333 }
3334 
3336  CodeGenFunction &CGF, SourceLocation Loc,
3337  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3338  bool Ordered, const DispatchRTInput &DispatchValues) {
3339  if (!CGF.HaveInsertPoint())
3340  return;
3342  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3343  assert(Ordered ||
3344  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3345  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3346  Schedule != OMP_sch_static_balanced_chunked));
3347  // Call __kmpc_dispatch_init(
3348  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3349  // kmp_int[32|64] lower, kmp_int[32|64] upper,
3350  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3351 
3352  // If the Chunk was not specified in the clause - use default value 1.
3353  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3354  : CGF.Builder.getIntN(IVSize, 1);
3355  llvm::Value *Args[] = {
3356  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3357  CGF.Builder.getInt32(addMonoNonMonoModifier(
3358  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3359  DispatchValues.LB, // Lower
3360  DispatchValues.UB, // Upper
3361  CGF.Builder.getIntN(IVSize, 1), // Stride
3362  Chunk // Chunk
3363  };
3364  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3365 }
3366 
3368  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3369  llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
3371  const CGOpenMPRuntime::StaticRTInput &Values) {
3372  if (!CGF.HaveInsertPoint())
3373  return;
3374 
3375  assert(!Values.Ordered);
3376  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3377  Schedule == OMP_sch_static_balanced_chunked ||
3378  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3379  Schedule == OMP_dist_sch_static ||
3380  Schedule == OMP_dist_sch_static_chunked);
3381 
3382  // Call __kmpc_for_static_init(
3383  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3384  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3385  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3386  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3387  llvm::Value *Chunk = Values.Chunk;
3388  if (Chunk == nullptr) {
3389  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3390  Schedule == OMP_dist_sch_static) &&
3391  "expected static non-chunked schedule");
3392  // If the Chunk was not specified in the clause - use default value 1.
3393  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3394  } else {
3395  assert((Schedule == OMP_sch_static_chunked ||
3396  Schedule == OMP_sch_static_balanced_chunked ||
3397  Schedule == OMP_ord_static_chunked ||
3398  Schedule == OMP_dist_sch_static_chunked) &&
3399  "expected static chunked schedule");
3400  }
3401  llvm::Value *Args[] = {
3402  UpdateLocation,
3403  ThreadId,
3404  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3405  M2)), // Schedule type
3406  Values.IL.getPointer(), // &isLastIter
3407  Values.LB.getPointer(), // &LB
3408  Values.UB.getPointer(), // &UB
3409  Values.ST.getPointer(), // &Stride
3410  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3411  Chunk // Chunk
3412  };
3413  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3414 }
3415 
3417  SourceLocation Loc,
3418  OpenMPDirectiveKind DKind,
3419  const OpenMPScheduleTy &ScheduleKind,
3420  const StaticRTInput &Values) {
3421  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3422  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3423  assert(isOpenMPWorksharingDirective(DKind) &&
3424  "Expected loop-based or sections-based directive.");
3425  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3426  isOpenMPLoopDirective(DKind)
3427  ? OMP_IDENT_WORK_LOOP
3428  : OMP_IDENT_WORK_SECTIONS);
3429  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3430  llvm::Constant *StaticInitFunction =
3432  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3433  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3434 }
3435 
3437  CodeGenFunction &CGF, SourceLocation Loc,
3438  OpenMPDistScheduleClauseKind SchedKind,
3439  const CGOpenMPRuntime::StaticRTInput &Values) {
3440  OpenMPSchedType ScheduleNum =
3441  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3442  llvm::Value *UpdatedLocation =
3443  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3444  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3445  llvm::Constant *StaticInitFunction =
3446  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3447  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3448  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3450 }
3451 
3453  SourceLocation Loc,
3454  OpenMPDirectiveKind DKind) {
3455  if (!CGF.HaveInsertPoint())
3456  return;
3457  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3458  llvm::Value *Args[] = {
3459  emitUpdateLocation(CGF, Loc,
3461  ? OMP_IDENT_WORK_DISTRIBUTE
3462  : isOpenMPLoopDirective(DKind)
3463  ? OMP_IDENT_WORK_LOOP
3464  : OMP_IDENT_WORK_SECTIONS),
3465  getThreadID(CGF, Loc)};
3467  Args);
3468 }
3469 
3471  SourceLocation Loc,
3472  unsigned IVSize,
3473  bool IVSigned) {
3474  if (!CGF.HaveInsertPoint())
3475  return;
3476  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3477  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3478  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3479 }
3480 
3482  SourceLocation Loc, unsigned IVSize,
3483  bool IVSigned, Address IL,
3484  Address LB, Address UB,
3485  Address ST) {
3486  // Call __kmpc_dispatch_next(
3487  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3488  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3489  // kmp_int[32|64] *p_stride);
3490  llvm::Value *Args[] = {
3491  emitUpdateLocation(CGF, Loc),
3492  getThreadID(CGF, Loc),
3493  IL.getPointer(), // &isLastIter
3494  LB.getPointer(), // &Lower
3495  UB.getPointer(), // &Upper
3496  ST.getPointer() // &Stride
3497  };
3498  llvm::Value *Call =
3499  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3500  return CGF.EmitScalarConversion(
3501  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3502  CGF.getContext().BoolTy, Loc);
3503 }
3504 
3506  llvm::Value *NumThreads,
3507  SourceLocation Loc) {
3508  if (!CGF.HaveInsertPoint())
3509  return;
3510  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3511  llvm::Value *Args[] = {
3512  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3513  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3515  Args);
3516 }
3517 
3519  OpenMPProcBindClauseKind ProcBind,
3520  SourceLocation Loc) {
3521  if (!CGF.HaveInsertPoint())
3522  return;
3523  // Constants for proc bind value accepted by the runtime.
3524  enum ProcBindTy {
3525  ProcBindFalse = 0,
3526  ProcBindTrue,
3527  ProcBindMaster,
3528  ProcBindClose,
3529  ProcBindSpread,
3530  ProcBindIntel,
3531  ProcBindDefault
3532  } RuntimeProcBind;
3533  switch (ProcBind) {
3534  case OMPC_PROC_BIND_master:
3535  RuntimeProcBind = ProcBindMaster;
3536  break;
3537  case OMPC_PROC_BIND_close:
3538  RuntimeProcBind = ProcBindClose;
3539  break;
3540  case OMPC_PROC_BIND_spread:
3541  RuntimeProcBind = ProcBindSpread;
3542  break;
3544  llvm_unreachable("Unsupported proc_bind value.");
3545  }
3546  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3547  llvm::Value *Args[] = {
3548  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3549  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3551 }
3552 
3553 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3554  SourceLocation Loc) {
3555  if (!CGF.HaveInsertPoint())
3556  return;
3557  // Build call void __kmpc_flush(ident_t *loc)
3559  emitUpdateLocation(CGF, Loc));
3560 }
3561 
3562 namespace {
3563 /// Indexes of fields for type kmp_task_t.
3565  /// List of shared variables.
3566  KmpTaskTShareds,
3567  /// Task routine.
3568  KmpTaskTRoutine,
3569  /// Partition id for the untied tasks.
3570  KmpTaskTPartId,
3571  /// Function with call of destructors for private variables.
3572  Data1,
3573  /// Task priority.
3574  Data2,
3575  /// (Taskloops only) Lower bound.
3576  KmpTaskTLowerBound,
3577  /// (Taskloops only) Upper bound.
3578  KmpTaskTUpperBound,
3579  /// (Taskloops only) Stride.
3580  KmpTaskTStride,
3581  /// (Taskloops only) Is last iteration flag.
3582  KmpTaskTLastIter,
3583  /// (Taskloops only) Reduction data.
3584  KmpTaskTReductions,
3585 };
3586 } // anonymous namespace
3587 
3588 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3589  return OffloadEntriesTargetRegion.empty() &&
3590  OffloadEntriesDeviceGlobalVar.empty();
3591 }
3592 
3593 /// Initialize target region entry.
3594 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3595  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3596  StringRef ParentName, unsigned LineNum,
3597  unsigned Order) {
3598  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3599  "only required for the device "
3600  "code generation.");
3601  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3602  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3603  OMPTargetRegionEntryTargetRegion);
3604  ++OffloadingEntriesNum;
3605 }
3606 
3607 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3608  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3609  StringRef ParentName, unsigned LineNum,
3610  llvm::Constant *Addr, llvm::Constant *ID,
3611  OMPTargetRegionEntryKind Flags) {
3612  // If we are emitting code for a target, the entry is already initialized,
3613  // only has to be registered.
3614  if (CGM.getLangOpts().OpenMPIsDevice) {
3615  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3616  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3618  "Unable to find target region on line '%0' in the device code.");
3619  CGM.getDiags().Report(DiagID) << LineNum;
3620  return;
3621  }
3622  auto &Entry =
3623  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3624  assert(Entry.isValid() && "Entry not initialized!");
3625  Entry.setAddress(Addr);
3626  Entry.setID(ID);
3627  Entry.setFlags(Flags);
3628  } else {
3629  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3630  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3631  ++OffloadingEntriesNum;
3632  }
3633 }
3634 
3635 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3636  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3637  unsigned LineNum) const {
3638  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3639  if (PerDevice == OffloadEntriesTargetRegion.end())
3640  return false;
3641  auto PerFile = PerDevice->second.find(FileID);
3642  if (PerFile == PerDevice->second.end())
3643  return false;
3644  auto PerParentName = PerFile->second.find(ParentName);
3645  if (PerParentName == PerFile->second.end())
3646  return false;
3647  auto PerLine = PerParentName->second.find(LineNum);
3648  if (PerLine == PerParentName->second.end())
3649  return false;
3650  // Fail if this entry is already registered.
3651  if (PerLine->second.getAddress() || PerLine->second.getID())
3652  return false;
3653  return true;
3654 }
3655 
3656 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3657  const OffloadTargetRegionEntryInfoActTy &Action) {
3658  // Scan all target region entries and perform the provided action.
3659  for (const auto &D : OffloadEntriesTargetRegion)
3660  for (const auto &F : D.second)
3661  for (const auto &P : F.second)
3662  for (const auto &L : P.second)
3663  Action(D.first, F.first, P.first(), L.first, L.second);
3664 }
3665 
3666 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3667  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3668  OMPTargetGlobalVarEntryKind Flags,
3669  unsigned Order) {
3670  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3671  "only required for the device "
3672  "code generation.");
3673  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3674  ++OffloadingEntriesNum;
3675 }
3676 
3677 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3678  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3679  CharUnits VarSize,
3680  OMPTargetGlobalVarEntryKind Flags,
3681  llvm::GlobalValue::LinkageTypes Linkage) {
3682  if (CGM.getLangOpts().OpenMPIsDevice) {
3683  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3684  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3685  "Entry not initialized!");
3686  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3687  "Resetting with the new address.");
3688  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName))
3689  return;
3690  Entry.setAddress(Addr);
3691  Entry.setVarSize(VarSize);
3692  Entry.setLinkage(Linkage);
3693  } else {
3694  if (hasDeviceGlobalVarEntryInfo(VarName))
3695  return;
3696  OffloadEntriesDeviceGlobalVar.try_emplace(
3697  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3698  ++OffloadingEntriesNum;
3699  }
3700 }
3701 
3702 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3703  actOnDeviceGlobalVarEntriesInfo(
3704  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3705  // Scan all target region entries and perform the provided action.
3706  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3707  Action(E.getKey(), E.getValue());
3708 }
3709 
3710 llvm::Function *
3712  // If we don't have entries or if we are emitting code for the device, we
3713  // don't need to do anything.
3714  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3715  return nullptr;
3716 
3717  llvm::Module &M = CGM.getModule();
3718  ASTContext &C = CGM.getContext();
3719 
3720  // Get list of devices we care about
3721  const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3722 
3723  // We should be creating an offloading descriptor only if there are devices
3724  // specified.
3725  assert(!Devices.empty() && "No OpenMP offloading devices??");
3726 
3727  // Create the external variables that will point to the begin and end of the
3728  // host entries section. These will be defined by the linker.
3729  llvm::Type *OffloadEntryTy =
3731  std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3732  auto *HostEntriesBegin = new llvm::GlobalVariable(
3733  M, OffloadEntryTy, /*isConstant=*/true,
3734  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3735  EntriesBeginName);
3736  std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3737  auto *HostEntriesEnd =
3738  new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3740  /*Initializer=*/nullptr, EntriesEndName);
3741 
3742  // Create all device images
3743  auto *DeviceImageTy = cast<llvm::StructType>(
3745  ConstantInitBuilder DeviceImagesBuilder(CGM);
3746  ConstantArrayBuilder DeviceImagesEntries =
3747  DeviceImagesBuilder.beginArray(DeviceImageTy);
3748 
3749  for (const llvm::Triple &Device : Devices) {
3750  StringRef T = Device.getTriple();
3751  std::string BeginName = getName({"omp_offloading", "img_start", ""});
3752  auto *ImgBegin = new llvm::GlobalVariable(
3753  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3754  /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3755  std::string EndName = getName({"omp_offloading", "img_end", ""});
3756  auto *ImgEnd = new llvm::GlobalVariable(
3757  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3758  /*Initializer=*/nullptr, Twine(EndName).concat(T));
3759 
3760  llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3761  HostEntriesEnd};
3763  DeviceImagesEntries);
3764  }
3765 
3766  // Create device images global array.
3767  std::string ImagesName = getName({"omp_offloading", "device_images"});
3768  llvm::GlobalVariable *DeviceImages =
3769  DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3770  CGM.getPointerAlign(),
3771  /*isConstant=*/true);
3772  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3773 
3774  // This is a Zero array to be used in the creation of the constant expressions
3775  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3776  llvm::Constant::getNullValue(CGM.Int32Ty)};
3777 
3778  // Create the target region descriptor.
3779  llvm::Constant *Data[] = {
3780  llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3781  llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3782  DeviceImages, Index),
3783  HostEntriesBegin, HostEntriesEnd};
3784  std::string Descriptor = getName({"omp_offloading", "descriptor"});
3785  llvm::GlobalVariable *Desc = createConstantGlobalStruct(
3786  CGM, getTgtBinaryDescriptorQTy(), Data, Descriptor);
3787 
3788  // Emit code to register or unregister the descriptor at execution
3789  // startup or closing, respectively.
3790 
3791  llvm::Function *UnRegFn;
3792  {
3793  FunctionArgList Args;
3795  Args.push_back(&DummyPtr);
3796 
3797  CodeGenFunction CGF(CGM);
3798  // Disable debug info for global (de-)initializer because they are not part
3799  // of some particular construct.
3800  CGF.disableDebugInfo();
3801  const auto &FI =
3803  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3804  std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3805  UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
3806  CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
3808  Desc);
3809  CGF.FinishFunction();
3810  }
3811  llvm::Function *RegFn;
3812  {
3813  CodeGenFunction CGF(CGM);
3814  // Disable debug info for global (de-)initializer because they are not part
3815  // of some particular construct.
3816  CGF.disableDebugInfo();
3817  const auto &FI = CGM.getTypes().arrangeNullaryFunction();
3818  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3819  std::string Descriptor = getName({"omp_offloading", "descriptor_reg"});
3820  RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
3821  CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
3823  // Create a variable to drive the registration and unregistration of the
3824  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3825  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
3826  SourceLocation(), nullptr, C.CharTy,
3828  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3829  CGF.FinishFunction();
3830  }
3831  if (CGM.supportsCOMDAT()) {
3832  // It is sufficient to call registration function only once, so create a
3833  // COMDAT group for registration/unregistration functions and associated
3834  // data. That would reduce startup time and code size. Registration
3835  // function serves as a COMDAT group key.
3836  llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
3837  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3838  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3839  RegFn->setComdat(ComdatKey);
3840  UnRegFn->setComdat(ComdatKey);
3841  DeviceImages->setComdat(ComdatKey);
3842  Desc->setComdat(ComdatKey);
3843  }
3844  return RegFn;
3845 }
3846 
3848  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3849  llvm::GlobalValue::LinkageTypes Linkage) {
3850  StringRef Name = Addr->getName();
3851  llvm::Module &M = CGM.getModule();
3852  llvm::LLVMContext &C = M.getContext();
3853 
3854  // Create constant string with the name.
3855  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3856 
3857  std::string StringName = getName({"omp_offloading", "entry_name"});
3858  auto *Str = new llvm::GlobalVariable(
3859  M, StrPtrInit->getType(), /*isConstant=*/true,
3860  llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3861  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3862 
3863  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
3864  llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
3865  llvm::ConstantInt::get(CGM.SizeTy, Size),
3866  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3867  llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3868  std::string EntryName = getName({"omp_offloading", "entry", ""});
3869  llvm::GlobalVariable *Entry = createConstantGlobalStruct(
3870  CGM, getTgtOffloadEntryQTy(), Data, Twine(EntryName).concat(Name),
3871  llvm::GlobalValue::WeakAnyLinkage);
3872 
3873  // The entry has to be created in the section the linker expects it to be.
3874  std::string Section = getName({"omp_offloading", "entries"});
3875  Entry->setSection(Section);
3876 }
3877 
3879  // Emit the offloading entries and metadata so that the device codegen side
3880  // can easily figure out what to emit. The produced metadata looks like
3881  // this:
3882  //
3883  // !omp_offload.info = !{!1, ...}
3884  //
3885  // Right now we only generate metadata for function that contain target
3886  // regions.
3887 
3888  // If we do not have entries, we don't need to do anything.
3890  return;
3891 
3892  llvm::Module &M = CGM.getModule();
3893  llvm::LLVMContext &C = M.getContext();
3895  OrderedEntries(OffloadEntriesInfoManager.size());
3896 
3897  // Auxiliary methods to create metadata values and strings.
3898  auto &&GetMDInt = [this](unsigned V) {
3899  return llvm::ConstantAsMetadata::get(
3900  llvm::ConstantInt::get(CGM.Int32Ty, V));
3901  };
3902 
3903  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3904 
3905  // Create the offloading info metadata node.
3906  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3907 
3908  // Create function that emits metadata for each target region entry;
3909  auto &&TargetRegionMetadataEmitter =
3910  [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
3911  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3912  unsigned Line,
3914  // Generate metadata for target regions. Each entry of this metadata
3915  // contains:
3916  // - Entry 0 -> Kind of this type of metadata (0).
3917  // - Entry 1 -> Device ID of the file where the entry was identified.
3918  // - Entry 2 -> File ID of the file where the entry was identified.
3919  // - Entry 3 -> Mangled name of the function where the entry was
3920  // identified.
3921  // - Entry 4 -> Line in the file where the entry was identified.
3922  // - Entry 5 -> Order the entry was created.
3923  // The first element of the metadata node is the kind.
3924  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3925  GetMDInt(FileID), GetMDString(ParentName),
3926  GetMDInt(Line), GetMDInt(E.getOrder())};
3927 
3928  // Save this entry in the right position of the ordered entries array.
3929  OrderedEntries[E.getOrder()] = &E;
3930 
3931  // Add metadata to the named metadata node.
3932  MD->addOperand(llvm::MDNode::get(C, Ops));
3933  };
3934 
3936  TargetRegionMetadataEmitter);
3937 
3938  // Create function that emits metadata for each device global variable entry;
3939  auto &&DeviceGlobalVarMetadataEmitter =
3940  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3941  MD](StringRef MangledName,
3943  &E) {
3944  // Generate metadata for global variables. Each entry of this metadata
3945  // contains:
3946  // - Entry 0 -> Kind of this type of metadata (1).
3947  // - Entry 1 -> Mangled name of the variable.
3948  // - Entry 2 -> Declare target kind.
3949  // - Entry 3 -> Order the entry was created.
3950  // The first element of the metadata node is the kind.
3951  llvm::Metadata *Ops[] = {
3952  GetMDInt(E.getKind()), GetMDString(MangledName),
3953  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3954 
3955  // Save this entry in the right position of the ordered entries array.
3956  OrderedEntries[E.getOrder()] = &E;
3957 
3958  // Add metadata to the named metadata node.
3959  MD->addOperand(llvm::MDNode::get(C, Ops));
3960  };
3961 
3963  DeviceGlobalVarMetadataEmitter);
3964 
3965  for (const auto *E : OrderedEntries) {
3966  assert(E && "All ordered entries must exist!");
3967  if (const auto *CE =
3968  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3969  E)) {
3970  if (!CE->getID() || !CE->getAddress()) {
3971  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3973  "Offloading entry for target region is incorect: either the "
3974  "address or the ID is invalid.");
3975  CGM.getDiags().Report(DiagID);
3976  continue;
3977  }
3978  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3979  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3980  } else if (const auto *CE =
3981  dyn_cast<OffloadEntriesInfoManagerTy::
3982  OffloadEntryInfoDeviceGlobalVar>(E)) {
3983  if (!CE->getAddress()) {
3984  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3986  "Offloading entry for declare target varible is inccorect: the "
3987  "address is invalid.");
3988  CGM.getDiags().Report(DiagID);
3989  continue;
3990  }
3991  createOffloadEntry(CE->getAddress(), CE->getAddress(),
3992  CE->getVarSize().getQuantity(), CE->getFlags(),
3993  CE->getLinkage());
3994  } else {
3995  llvm_unreachable("Unsupported entry kind.");
3996  }
3997  }
3998 }
3999 
4000 /// Loads all the offload entries information from the host IR
4001 /// metadata.
4003  // If we are in target mode, load the metadata from the host IR. This code has
4004  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4005 
4006  if (!CGM.getLangOpts().OpenMPIsDevice)
4007  return;
4008 
4009  if (CGM.getLangOpts().OMPHostIRFile.empty())
4010  return;
4011 
4012  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4013  if (auto EC = Buf.getError()) {
4014  CGM.getDiags().Report(diag::err_cannot_open_file)
4015  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4016  return;
4017  }
4018 
4019  llvm::LLVMContext C;
4020  auto ME = expectedToErrorOrAndEmitErrors(
4021  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4022 
4023  if (auto EC = ME.getError()) {
4024  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4025  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4026  CGM.getDiags().Report(DiagID)
4027  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4028  return;
4029  }
4030 
4031  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4032  if (!MD)
4033  return;
4034 
4035  for (llvm::MDNode *MN : MD->operands()) {
4036  auto &&GetMDInt = [MN](unsigned Idx) {
4037  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4038  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4039  };
4040 
4041  auto &&GetMDString = [MN](unsigned Idx) {
4042  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4043  return V->getString();
4044  };
4045 
4046  switch (GetMDInt(0)) {
4047  default:
4048  llvm_unreachable("Unexpected metadata!");
4049  break;
4053  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4054  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4055  /*Order=*/GetMDInt(5));
4056  break;
4060  /*MangledName=*/GetMDString(1),
4061  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4062  /*Flags=*/GetMDInt(2)),
4063  /*Order=*/GetMDInt(3));
4064  break;
4065  }
4066  }
4067 }
4068 
4070  if (!KmpRoutineEntryPtrTy) {
4071  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4072  ASTContext &C = CGM.getContext();
4073  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4075  KmpRoutineEntryPtrQTy = C.getPointerType(
4076  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4077  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4078  }
4079 }
4080 
4082  // Make sure the type of the entry is already created. This is the type we
4083  // have to create:
4084  // struct __tgt_offload_entry{
4085  // void *addr; // Pointer to the offload entry info.
4086  // // (function or global)
4087  // char *name; // Name of the function or global.
4088  // size_t size; // Size of the entry info (0 if it a function).
4089  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4090  // int32_t reserved; // Reserved, to use by the runtime library.
4091  // };
4092  if (TgtOffloadEntryQTy.isNull()) {
4093  ASTContext &C = CGM.getContext();
4094  RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4095  RD->startDefinition();
4096  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4098  addFieldToRecordDecl(C, RD, C.getSizeType());
4100  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4102  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4103  RD->completeDefinition();
4104  RD->addAttr(PackedAttr::CreateImplicit(C));
4106  }
4107  return TgtOffloadEntryQTy;
4108 }
4109 
4111  // These are the types we need to build:
4112  // struct __tgt_device_image{
4113  // void *ImageStart; // Pointer to the target code start.
4114  // void *ImageEnd; // Pointer to the target code end.
4115  // // We also add the host entries to the device image, as it may be useful
4116  // // for the target runtime to have access to that information.
4117  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
4118  // // the entries.
4119  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4120  // // entries (non inclusive).
4121  // };
4122  if (TgtDeviceImageQTy.isNull()) {
4123  ASTContext &C = CGM.getContext();
4124  RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4125  RD->startDefinition();
4126  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4127  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4130  RD->completeDefinition();
4132  }
4133  return TgtDeviceImageQTy;
4134 }
4135 
4137  // struct __tgt_bin_desc{
4138  // int32_t NumDevices; // Number of devices supported.
4139  // __tgt_device_image *DeviceImages; // Arrays of device images
4140  // // (one per device).
4141  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
4142  // // entries.
4143  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4144  // // entries (non inclusive).
4145  // };
4147  ASTContext &C = CGM.getContext();
4148  RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4149  RD->startDefinition();
4151  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4155  RD->completeDefinition();
4157  }
4158  return TgtBinaryDescriptorQTy;
4159 }
4160 
4161 namespace {
4162 struct PrivateHelpersTy {
4163  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4164  const VarDecl *PrivateElemInit)
4165  : Original(Original), PrivateCopy(PrivateCopy),
4166  PrivateElemInit(PrivateElemInit) {}
4167  const VarDecl *Original;
4168  const VarDecl *PrivateCopy;
4169  const VarDecl *PrivateElemInit;
4170 };
4171 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4172 } // anonymous namespace
4173 
4174 static RecordDecl *
4176  if (!Privates.empty()) {
4177  ASTContext &C = CGM.getContext();
4178  // Build struct .kmp_privates_t. {
4179  // /* private vars */
4180  // };
4181  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4182  RD->startDefinition();
4183  for (const auto &Pair : Privates) {
4184  const VarDecl *VD = Pair.second.Original;
4185  QualType Type = VD->getType().getNonReferenceType();
4186  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4187  if (VD->hasAttrs()) {
4188  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4189  E(VD->getAttrs().end());
4190  I != E; ++I)
4191  FD->addAttr(*I);
4192  }
4193  }
4194  RD->completeDefinition();
4195  return RD;
4196  }
4197  return nullptr;
4198 }
4199 
4200 static RecordDecl *
4202  QualType KmpInt32Ty,
4203  QualType KmpRoutineEntryPointerQTy) {
4204  ASTContext &C = CGM.getContext();
4205  // Build struct kmp_task_t {
4206  // void * shareds;
4207  // kmp_routine_entry_t routine;
4208  // kmp_int32 part_id;
4209  // kmp_cmplrdata_t data1;
4210  // kmp_cmplrdata_t data2;
4211  // For taskloops additional fields:
4212  // kmp_uint64 lb;
4213  // kmp_uint64 ub;
4214  // kmp_int64 st;
4215  // kmp_int32 liter;
4216  // void * reductions;
4217  // };
4218  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4219  UD->startDefinition();
4220  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4221  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4222  UD->completeDefinition();
4223  QualType KmpCmplrdataTy = C.getRecordType(UD);
4224  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4225  RD->startDefinition();
4226  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4227  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4228  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4229  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4230  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4231  if (isOpenMPTaskLoopDirective(Kind)) {
4232  QualType KmpUInt64Ty =
4233  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4234  QualType KmpInt64Ty =
4235  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4236  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4237  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4238  addFieldToRecordDecl(C, RD, KmpInt64Ty);
4239  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4240  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4241  }
4242  RD->completeDefinition();
4243  return RD;
4244 }
4245 
4246 static RecordDecl *
4248  ArrayRef<PrivateDataTy> Privates) {
4249  ASTContext &C = CGM.getContext();
4250  // Build struct kmp_task_t_with_privates {
4251  // kmp_task_t task_data;
4252  // .kmp_privates_t. privates;
4253  // };
4254  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4255  RD->startDefinition();
4256  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4257  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4258  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4259  RD->completeDefinition();
4260  return RD;
4261 }
4262 
4263 /// Emit a proxy function which accepts kmp_task_t as the second
4264 /// argument.
4265 /// \code
4266 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4267 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4268 /// For taskloops:
4269 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4270 /// tt->reductions, tt->shareds);
4271 /// return 0;
4272 /// }
4273 /// \endcode
4274 static llvm::Value *
4276  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4277  QualType KmpTaskTWithPrivatesPtrQTy,
4278  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4279  QualType SharedsPtrTy, llvm::Value *TaskFunction,
4280  llvm::Value *TaskPrivatesMap) {
4281  ASTContext &C = CGM.getContext();
4282  FunctionArgList Args;
4283  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4285  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4286  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4288  Args.push_back(&GtidArg);
4289  Args.push_back(&TaskTypeArg);
4290  const auto &TaskEntryFnInfo =
4291  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4292  llvm::FunctionType *TaskEntryTy =
4293  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4294  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4295  auto *TaskEntry = llvm::Function::Create(
4296  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4297  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4298  TaskEntry->setDoesNotRecurse();
4299  CodeGenFunction CGF(CGM);
4300  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4301  Loc, Loc);
4302 
4303  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4304  // tt,
4305  // For taskloops:
4306  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4307  // tt->task_data.shareds);
4308  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4309  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4310  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4311  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4312  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4313  const auto *KmpTaskTWithPrivatesQTyRD =
4314  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4315  LValue Base =
4316  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4317  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4318  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4319  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4320  llvm::Value *PartidParam = PartIdLVal.getPointer();
4321 
4322  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4323  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4325  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4326  CGF.ConvertTypeForMem(SharedsPtrTy));
4327 
4328  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4329  llvm::Value *PrivatesParam;
4330  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4331  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4332  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4333  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4334  } else {
4335  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4336  }
4337 
4338  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4339  TaskPrivatesMap,
4340  CGF.Builder
4342  TDBase.getAddress(), CGF.VoidPtrTy)
4343  .getPointer()};
4344  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4345  std::end(CommonArgs));
4346  if (isOpenMPTaskLoopDirective(Kind)) {
4347  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4348  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4349  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4350  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4351  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4352  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4353  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4354  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4355  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4356  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4357  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4358  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4359  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4360  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4361  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4362  CallArgs.push_back(LBParam);
4363  CallArgs.push_back(UBParam);
4364  CallArgs.push_back(StParam);
4365  CallArgs.push_back(LIParam);
4366  CallArgs.push_back(RParam);
4367  }
4368  CallArgs.push_back(SharedsParam);
4369 
4370  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4371  CallArgs);
4372  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4373  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4374  CGF.FinishFunction();
4375  return TaskEntry;
4376 }
4377 
4379  SourceLocation Loc,
4380  QualType KmpInt32Ty,
4381  QualType KmpTaskTWithPrivatesPtrQTy,
4382  QualType KmpTaskTWithPrivatesQTy) {
4383  ASTContext &C = CGM.getContext();
4384  FunctionArgList Args;
4385  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4387  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4388  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4390  Args.push_back(&GtidArg);
4391  Args.push_back(&TaskTypeArg);
4392  const auto &DestructorFnInfo =
4393  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4394  llvm::FunctionType *DestructorFnTy =
4395  CGM.getTypes().GetFunctionType(DestructorFnInfo);
4396  std::string Name =
4397  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4398  auto *DestructorFn =
4400  Name, &CGM.getModule());
4401  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4402  DestructorFnInfo);
4403  DestructorFn->setDoesNotRecurse();
4404  CodeGenFunction CGF(CGM);
4405  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4406  Args, Loc, Loc);
4407 
4409  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4410  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4411  const auto *KmpTaskTWithPrivatesQTyRD =
4412  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4413  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4414  Base = CGF.EmitLValueForField(Base, *FI);
4415  for (const auto *Field :
4416  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4417  if (QualType::DestructionKind DtorKind =
4418  Field->getType().isDestructedType()) {
4419  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4420  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4421  }
4422  }
4423  CGF.FinishFunction();
4424  return DestructorFn;
4425 }
4426 
4427 /// Emit a privates mapping function for correct handling of private and
4428 /// firstprivate variables.
4429 /// \code
4430 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4431 /// **noalias priv1,..., <tyn> **noalias privn) {
4432 /// *priv1 = &.privates.priv1;
4433 /// ...;
4434 /// *privn = &.privates.privn;
4435 /// }
4436 /// \endcode
4437 static llvm::Value *
4439  ArrayRef<const Expr *> PrivateVars,
4440  ArrayRef<const Expr *> FirstprivateVars,
4441  ArrayRef<const Expr *> LastprivateVars,
4442  QualType PrivatesQTy,
4443  ArrayRef<PrivateDataTy> Privates) {
4444  ASTContext &C = CGM.getContext();
4445  FunctionArgList Args;
4446  ImplicitParamDecl TaskPrivatesArg(
4447  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4448  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4450  Args.push_back(&TaskPrivatesArg);
4451  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4452  unsigned Counter = 1;
4453  for (const Expr *E : PrivateVars) {
4454  Args.push_back(ImplicitParamDecl::Create(
4455  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4457  .withConst()
4458  .withRestrict(),
4460  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4461  PrivateVarsPos[VD] = Counter;
4462  ++Counter;
4463  }
4464  for (const Expr *E : FirstprivateVars) {
4465  Args.push_back(ImplicitParamDecl::Create(
4466  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4468  .withConst()
4469  .withRestrict(),
4471  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4472  PrivateVarsPos[VD] = Counter;
4473  ++Counter;
4474  }
4475  for (const Expr *E : LastprivateVars) {
4476  Args.push_back(ImplicitParamDecl::Create(
4477  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4479  .withConst()
4480  .withRestrict(),
4482  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4483  PrivateVarsPos[VD] = Counter;
4484  ++Counter;
4485  }
4486  const auto &TaskPrivatesMapFnInfo =
4488  llvm::FunctionType *TaskPrivatesMapTy =
4489  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4490  std::string Name =
4491  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4492  auto *TaskPrivatesMap = llvm::Function::Create(
4493  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4494  &CGM.getModule());
4495  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4496  TaskPrivatesMapFnInfo);
4497  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4498  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4499  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4500  CodeGenFunction CGF(CGM);
4501  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4502  TaskPrivatesMapFnInfo, Args, Loc, Loc);
4503 
4504  // *privi = &.privates.privi;
4506  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4507  TaskPrivatesArg.getType()->castAs<PointerType>());
4508  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4509  Counter = 0;
4510  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4511  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4512  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4513  LValue RefLVal =
4514  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4515  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4516  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4517  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4518  ++Counter;
4519  }
4520  CGF.FinishFunction();
4521  return TaskPrivatesMap;
4522 }
4523 
4524 static bool stable_sort_comparator(const PrivateDataTy P1,
4525  const PrivateDataTy P2) {
4526  return P1.first > P2.first;
4527 }
4528 
4529 /// Emit initialization for private variables in task-based directives.
4531  const OMPExecutableDirective &D,
4532  Address KmpTaskSharedsPtr, LValue TDBase,
4533  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4534  QualType SharedsTy, QualType SharedsPtrTy,
4535  const OMPTaskDataTy &Data,
4536  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4537  ASTContext &C = CGF.getContext();
4538  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4539  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4541  ? OMPD_taskloop
4542  : OMPD_task;
4543  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4544  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4545  LValue SrcBase;
4546  bool IsTargetTask =
4549  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4550  // PointersArray and SizesArray. The original variables for these arrays are
4551  // not captured and we get their addresses explicitly.
4552  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4553  (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4554  SrcBase = CGF.MakeAddrLValue(
4556  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4557  SharedsTy);
4558  }
4559  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4560  for (const PrivateDataTy &Pair : Privates) {
4561  const VarDecl *VD = Pair.second.PrivateCopy;
4562  const Expr *Init = VD->getAnyInitializer();
4563  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4564  !CGF.isTrivialInitializer(Init)))) {
4565  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4566  if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4567  const VarDecl *OriginalVD = Pair.second.Original;
4568  // Check if the variable is the target-based BasePointersArray,
4569  // PointersArray or SizesArray.
4570  LValue SharedRefLValue;
4571  QualType Type = OriginalVD->getType();
4572  const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4573  if (IsTargetTask && !SharedField) {
4574  assert(isa<ImplicitParamDecl>(OriginalVD) &&
4575  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4576  cast<CapturedDecl>(OriginalVD->getDeclContext())
4577  ->getNumParams() == 0 &&
4578  isa<TranslationUnitDecl>(
4579  cast<CapturedDecl>(OriginalVD->getDeclContext())
4580  ->getDeclContext()) &&
4581  "Expected artificial target data variable.");
4582  SharedRefLValue =
4583  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4584  } else {
4585  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4586  SharedRefLValue = CGF.MakeAddrLValue(
4587  Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4588  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4589  SharedRefLValue.getTBAAInfo());
4590  }
4591  if (Type->isArrayType()) {
4592  // Initialize firstprivate array.
4593  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4594  // Perform simple memcpy.
4595  CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4596  } else {
4597  // Initialize firstprivate array using element-by-element
4598  // initialization.
4600  PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4601  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4602  Address SrcElement) {
4603  // Clean up any temporaries needed by the initialization.
4604  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4605  InitScope.addPrivate(
4606  Elem, [SrcElement]() -> Address { return SrcElement; });
4607  (void)InitScope.Privatize();
4608  // Emit initialization for single element.
4610  CGF, &CapturesInfo);
4611  CGF.EmitAnyExprToMem(Init, DestElement,
4612  Init->getType().getQualifiers(),
4613  /*IsInitializer=*/false);
4614  });
4615  }
4616  } else {
4617  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4618  InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4619  return SharedRefLValue.getAddress();
4620  });
4621  (void)InitScope.Privatize();
4622  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4623  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4624  /*capturedByInit=*/false);
4625  }
4626  } else {
4627  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4628  }
4629  }
4630  ++FI;
4631  }
4632 }
4633 
4634 /// Check if duplication function is required for taskloops.
4636  ArrayRef<PrivateDataTy> Privates) {
4637  bool InitRequired = false;
4638  for (const PrivateDataTy &Pair :